diff --git a/.gitignore b/.gitignore
index 1b4f1b656..68b631bc1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -152,5 +152,8 @@ debug*/
*.dat
*.tsv
*.gz
+*.csv
+*.p
+*.pdf
cache/
diff --git a/README.md b/README.md
index 69c58aa9c..823a2a6e1 100644
--- a/README.md
+++ b/README.md
@@ -27,6 +27,9 @@
# LAVIS - A Library for Language-Vision Intelligence
## What's New: 🎉
+ * [Model Release] November 2023, released implementation of **X-InstructBLIP**
+ [Paper](https://arxiv.org/pdf/2311.18799.pdf), [Project Page](https://github.com/salesforce/LAVIS/tree/main/projects/xinstructblip), [Website](https://artemisp.github.io/X-InstructBLIP-page/), [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/salesforce/LAVIS/blob/main/projects/xinstructblip/demo/run_demo.ipynb)
+ > A simple, yet effective, cross-modality framework built atop frozen LLMs that allows the integration of various modalities (image, video, audio, 3D) without extensive modality-specific customization.
* [Model Release] July 2023, released implementation of **BLIP-Diffusion**
[Paper](https://arxiv.org/abs/2305.06500), [Project Page](https://github.com/salesforce/LAVIS/tree/main/projects/blip-diffusion), [Website](https://dxli94.github.io/BLIP-Diffusion-website/)
> A text-to-image generation model that trains 20x than DreamBooth. Also facilitates zero-shot subject-driven generation and editing.
diff --git a/assets/LAVIS_technical_report.pdf b/assets/LAVIS_technical_report.pdf
deleted file mode 100644
index 14f148e39..000000000
Binary files a/assets/LAVIS_technical_report.pdf and /dev/null differ
diff --git a/lavis/common/utils.py b/lavis/common/utils.py
index 616588e7c..c714cc988 100644
--- a/lavis/common/utils.py
+++ b/lavis/common/utils.py
@@ -1,5 +1,5 @@
"""
- Copyright (c) 2022, salesforce.com, inc.
+ Copyright (c) 2023, salesforce.com, inc.
All rights reserved.
SPDX-License-Identifier: BSD-3-Clause
For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
@@ -440,3 +440,16 @@ def get_file_size(filename):
"""
size_in_mb = os.path.getsize(filename) / float(1024**2)
return size_in_mb
+
+def is_serializable(value):
+ """
+ This function checks if the provided value can be serialized into a JSON string.
+ """
+ try:
+ json.dumps(value)
+ return True
+ except (TypeError, OverflowError):
+ return False
+
+def is_convertible_to_int(value):
+ return bool(re.match(r'^-?\d+$', str(value)))
\ No newline at end of file
diff --git a/lavis/configs/datasets/aokvqa/defaults_instruct.yaml b/lavis/configs/datasets/aokvqa/defaults_instruct.yaml
new file mode 100644
index 000000000..3d6dc37f7
--- /dev/null
+++ b/lavis/configs/datasets/aokvqa/defaults_instruct.yaml
@@ -0,0 +1,52 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ aok_vqa_instruct:
+ # data_dir: ${env.data_dir}/datasets
+ data_type: images # [images|videos|features]
+
+ vis_processor:
+ train:
+ name: "clip_image_train"
+ image_size: 224
+ eval:
+ name: "clip_image_eval"
+ image_size: 224
+
+ text_processor:
+ train:
+ name: blip_instruction
+ modality: image
+ task: qa
+ eval:
+ name: blip_question
+
+ build_info:
+ # Be careful not to append minus sign (-) before split to avoid itemizing
+ annotations:
+ train:
+ url:
+ - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/aokvqa/aokvqa_v1p0_train.json
+ storage:
+ - aokvqa/annotations/aokvqa_v1p0_train.json
+ # val:
+ # url:
+ # - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/aokvqa/aokvqa_v1p0_val.json
+ # - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/aokvqa/specialized_vocab_train.json
+ # storage:
+ # - aokvqa/annotations/aokvqa_v1p0_val.json
+ # - aokvqa/annotations/specialized_vocab_train_lavis.json
+ # # - aokvqa/annotations/large_vocab_train_lavis.json
+ # test:
+ # url:
+ # - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/aokvqa/aokvqa_v1p0_test.json
+ # - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/aokvqa/specialized_vocab_train.json
+ # storage:
+ # - aokvqa/annotations/aokvqa_v1p0_test.json
+ # - aokvqa/annotations/specialized_vocab_train_lavis.json
+ images:
+ # storage: /coco/images
+ storage: /export/share/datasets/vision/coco/images
diff --git a/lavis/configs/datasets/audiocaps/defaults_mm_cap.yaml b/lavis/configs/datasets/audiocaps/defaults_mm_cap.yaml
new file mode 100644
index 000000000..76a05788d
--- /dev/null
+++ b/lavis/configs/datasets/audiocaps/defaults_mm_cap.yaml
@@ -0,0 +1,49 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ audiocaps_mm_caption: # name of the dataset builder
+ audio_processor:
+ train:
+ name: beats_audio
+ sampling_rate: 16000
+ eval:
+ name: beats_audio
+ sampling_rate: 16000
+
+ text_processor:
+ train:
+ name: "blip_instruction"
+ modality: audio
+ task: caption
+ eval:
+ name: "blip_caption"
+
+ data_type: [audio]
+
+ build_info:
+ kwargs:
+ missing_ids: [2sh7ZkazyO8, 966jA2-z0mQ, 52RlolYyjAE, HVAc9hm4jjk, 8lPjqvYWNyM, eXgPnnE3TuQ]
+ annotations:
+ train:
+ url:
+ - https://raw.githubusercontent.com/cdjkim/audiocaps/master/dataset/train.csv
+ storage:
+ - audiocaps/annotations/train.csv
+
+ val:
+ url:
+ - https://raw.githubusercontent.com/cdjkim/audiocaps/master/dataset/val.csv
+ storage:
+ - audiocaps/annotations/val.csv
+
+ test:
+ url:
+ - https://raw.githubusercontent.com/cdjkim/audiocaps/master/dataset/test.csv
+ storage:
+ - audiocaps/annotations/test.csv
+
+ audio:
+ storage: /export/einstein-vision/audio_datasets/audiocaps/AUDIOCAPS_32000Hz/audio
\ No newline at end of file
diff --git a/lavis/configs/datasets/audiocaps/defaults_mm_cap_instruct.yaml b/lavis/configs/datasets/audiocaps/defaults_mm_cap_instruct.yaml
new file mode 100644
index 000000000..27cf6b06e
--- /dev/null
+++ b/lavis/configs/datasets/audiocaps/defaults_mm_cap_instruct.yaml
@@ -0,0 +1,52 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ audiocaps_mm_caption_instruct: # name of the dataset builder
+ audio_processor:
+ train:
+ name: beats_audio
+ sampling_rate: 16000
+ eval:
+ name: beats_audio
+ sampling_rate: 16000
+
+ text_processor:
+ train:
+ name: "blip_instruction"
+ modality: audio
+ task: caption
+ eval:
+ name: "blip_caption"
+
+ data_type: [audio]
+
+ missing_ids: [2sh7ZkazyO8, 966jA2-z0mQ, 52RlolYyjAE, HVAc9hm4jjk, 8lPjqvYWNyM, eXgPnnE3TuQ]
+
+ build_info:
+ kwargs:
+ cached: False
+ cached_dir: /export/einstein-vision/audio_datasets/audiocaps/beats_features
+ annotations:
+ train:
+ url:
+ - https://raw.githubusercontent.com/cdjkim/audiocaps/master/dataset/train.csv
+ storage:
+ - audiocaps/annotations/train.csv
+
+ # val:
+ # url:
+ # - https://raw.githubusercontent.com/cdjkim/audiocaps/master/dataset/val.csv
+ # storage:
+ # - audiocaps/annotation/val.csv
+
+ # test:
+ # url:
+ # - https://raw.githubusercontent.com/cdjkim/audiocaps/master/dataset/test.csv
+ # storage:
+ # - /export/einstein-vision/audio_datasets/audiocaps/dataset/test.csv
+
+ audio:
+ storage: /export/einstein-vision/audio_datasets/audiocaps/AUDIOCAPS_32000Hz/audio
\ No newline at end of file
diff --git a/lavis/configs/datasets/audiocaps/defaults_mm_qa.yaml b/lavis/configs/datasets/audiocaps/defaults_mm_qa.yaml
new file mode 100644
index 000000000..04b9ae39b
--- /dev/null
+++ b/lavis/configs/datasets/audiocaps/defaults_mm_qa.yaml
@@ -0,0 +1,51 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ audiocaps_mm_qa: # name of the dataset builder
+ audio_processor:
+ train:
+ name: beats_audio
+ sampling_rate: 16000
+ eval:
+ name: beats_audio
+ sampling_rate: 16000
+ is_eval: True
+
+ text_processor:
+ train:
+ name: "blip_instruction"
+ modality: audio
+ task: qa
+ eval:
+ name: "blip_question"
+
+ data_type: [audio]
+
+ build_info:
+ kwargs:
+ cached: False
+ # add_binary: True
+ cached_dir: /export/einstein-vision/audio_datasets/audiocaps/beats_features
+ missing_ids: [2sh7ZkazyO8, 966jA2-z0mQ, 52RlolYyjAE, HVAc9hm4jjk, 8lPjqvYWNyM, eXgPnnE3TuQ]
+ annotations:
+ train:
+ url:
+ - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/audiocaps/audio_qa_final_train.csv
+ # - /export/home/LAVIS-xgen_mm/projects/xinstructblip/data_aug/audio_qa_data/audio_qa_final_train.csv
+ storage:
+ - audiocaps_qa/annotations/train.csv
+ # - /export/home/LAVIS-xgen_mm/projects/xinstructblip/data_aug/audio_qa_data/audio_qa_final_train.csv
+
+ # val:
+ # url:
+ # # - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/audiocaps/audio_qa_final_val.csv
+ # - /export/home/LAVIS-xgen_mm/projects/xinstructblip/data_aug/audio_qa_data/audio_qa_final_val.csv
+ # storage:
+ # # - audiocaps_qa/annotations/val.csv
+ # - /export/home/LAVIS-xgen_mm/projects/xinstructblip/data_aug/audio_qa_data/audio_qa_final_val.csv
+
+ audio:
+ storage: /export/einstein-vision/audio_datasets/audiocaps/AUDIOCAPS_32000Hz/audio
\ No newline at end of file
diff --git a/lavis/configs/datasets/audioset/defaults_mm_cap.yaml b/lavis/configs/datasets/audioset/defaults_mm_cap.yaml
new file mode 100644
index 000000000..c00f9aa8f
--- /dev/null
+++ b/lavis/configs/datasets/audioset/defaults_mm_cap.yaml
@@ -0,0 +1,47 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ audioset_mm_caption: # 14141
+ audio_processor:
+ train:
+ name: beats_audio
+ sampling_rate: 16000
+ eval:
+ name: beats_audio
+ sampling_rate: 16000
+ is_eval: False
+
+ text_processor:
+ train:
+ name: blip_instruction
+ modality: audio
+ task: classification
+ eval:
+ name: blip_caption
+
+ data_type: [audio]
+
+ build_info:
+ annotations:
+ train:
+ url:
+ - https://storage.googleapis.com/sfr-xinstructblip-data-research/data//audioset/balanced_train_clean.csv
+ # - /export/home/LAVIS-xgen_mm/lavis/configs/datasets/audioset/balanced_train_clean.csv
+ - http://storage.googleapis.com/us_audioset/youtube_corpus/v1/csv/class_labels_indices.csv
+ storage:
+ - audioset/balanced_train_clean.csv
+ # - /export/home/LAVIS-xgen_mm/lavis/configs/datasets/audioset/balanced_train_clean.csv
+ - audioset/annotations/class_labels_indices.csv
+
+ # val:
+ # url:
+ # - http://storage.googleapis.com/us_audioset/youtube_corpus/v1/csv/eval_segments.csv
+ # - http://storage.googleapis.com/us_audioset/youtube_corpus/v1/csv/class_labels_indices.csv
+ # storage:
+ # - audioset/annotations/eval_segments.csv
+ # - audioset/annotations/class_labels_indices.csv
+ audio:
+ storage: /export/einstein-vision/audio_datasets/AudioSet/all_audio
\ No newline at end of file
diff --git a/lavis/configs/datasets/audioset/defaults_mm_cap_instruct.yaml b/lavis/configs/datasets/audioset/defaults_mm_cap_instruct.yaml
new file mode 100644
index 000000000..2b0c7746f
--- /dev/null
+++ b/lavis/configs/datasets/audioset/defaults_mm_cap_instruct.yaml
@@ -0,0 +1,48 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ audioset_mm_caption_instruct: # 14141
+ audio_processor:
+ train:
+ name: beats_audio
+ sampling_rate: 16000
+ eval:
+ name: beats_audio
+ sampling_rate: 16000
+ is_eval: False
+
+ text_processor:
+ train:
+ name: blip_instruction
+ modality: audio
+ task: classification
+ eval:
+ name: blip_caption
+
+ data_type: [audio]
+
+ build_info:
+ annotations:
+ train:
+ url:
+ # - https://storage.googleapis.com/sfr-xinstructblip-data-research/data//audioset/balanced_train_clean.csv
+ - /export/home/LAVIS-xgen_mm/lavis/configs/datasets/audioset/balanced_train_clean.csv
+ - http://storage.googleapis.com/us_audioset/youtube_corpus/v1/csv/class_labels_indices.csv
+ storage:
+ - audioset/annotations/balanced_train_clean.csv
+ # - /export/home/LAVIS-xgen_mm/lavis/configs/datasets/audioset/balanced_train_clean.csv
+ - audioset/annotations/class_labels_indices.csv
+
+ # val:
+ # url:
+ # - http://storage.googleapis.com/us_audioset/youtube_corpus/v1/csv/eval_segments.csv
+ # - http://storage.googleapis.com/us_audioset/youtube_corpus/v1/csv/class_labels_indices.csv
+ # storage:
+ # - audioset/annotations/eval_segments.csv
+ # - audioset/annotations/class_labels_indices.csv
+
+ audio:
+ storage: /export/einstein-vision/audio_datasets/AudioSet/all_audio
\ No newline at end of file
diff --git a/lavis/configs/datasets/avsd/defaults_mm_dial_instruct.yaml b/lavis/configs/datasets/avsd/defaults_mm_dial_instruct.yaml
new file mode 100644
index 000000000..2a151652d
--- /dev/null
+++ b/lavis/configs/datasets/avsd/defaults_mm_dial_instruct.yaml
@@ -0,0 +1,65 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ avsd_mm_dialogue_instruct: # name of the dataset builder
+ data_type: [video, audio]
+
+ audio_processor:
+ train:
+ name: beats_audio
+ sampling_rate: 16000
+ eval:
+ name: beats_audio
+ sampling_rate: 16000
+
+ video_processor:
+ train:
+ name: alpro_video_train
+ n_frms: 4
+ image_size: 224
+ min_scale: 0.9
+ max_scale: 1.0
+ full_video: True
+ eval:
+ name: alpro_video_eval
+ n_frms: 4
+ image_size: 224
+ min_scale: 0.9
+ max_scale: 1.0
+ full_video: True
+
+ text_processor:
+ train:
+ name: "blip_caption"
+ eval:
+ name: "blip_caption"
+
+ build_info:
+ # Be careful not to append minus sign (-) before split to avoid itemizing
+ annotations:
+ train:
+ url:
+ - https://storage.googleapis.com/sfr-vision-language-research/datasets/avsd_dstc7_train.json
+ storage:
+ - avsd/annotations/train.json
+ val:
+ url:
+ - https://storage.googleapis.com/sfr-vision-language-research/datasets/avsd_dstc7_val.json
+ storage:
+ - avsd/annotations/val.json
+ test:
+ url:
+ - https://storage.googleapis.com/sfr-vision-language-research/datasets/avsd_dstc7_test.json
+ storage:
+ - avsd/annotations/test.json
+ templates: null
+
+ audio:
+ storage: /export/video-language-dataset/data/charade/videos
+
+ video:
+ storage: /export/video-language-dataset/data/charade/videos
+
diff --git a/lavis/configs/datasets/capfilt14m/defaults_cap.yaml b/lavis/configs/datasets/capfilt14m/defaults_cap.yaml
new file mode 100644
index 000000000..7d2821c7b
--- /dev/null
+++ b/lavis/configs/datasets/capfilt14m/defaults_cap.yaml
@@ -0,0 +1,30 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ capfilt14m: # 13873136
+ # data_dir: ${env.data_dir}/datasets
+ data_type: images # [images|videos|features]
+
+ vis_processor:
+ train:
+ name: "clip_image_train"
+ image_size: 224
+ text_processor:
+ train:
+ name: blip_caption
+
+ build_info:
+ # Be careful not to append minus sign (-) before split to avoid itemizing
+ annotations:
+ train:
+ url:
+ - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/capfilt14m/annotation.json
+ # - /export/share/datasets/vision_language/capfilt_14m_new/annotation.json
+ storage:
+ - capfilt14m/annotations/annotation.json
+ # - /export/share/datasets/vision_language/capfilt_14m_new/annotation.json
+ images:
+ storage: /export/share/datasets/vision/coco/images
\ No newline at end of file
diff --git a/lavis/configs/datasets/capfilt14m/defaults_cap_instruct.yaml b/lavis/configs/datasets/capfilt14m/defaults_cap_instruct.yaml
new file mode 100644
index 000000000..ec4e59e8f
--- /dev/null
+++ b/lavis/configs/datasets/capfilt14m/defaults_cap_instruct.yaml
@@ -0,0 +1,34 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ capfilt14m_instruct: # 13873136
+ # data_dir: ${env.data_dir}/datasets
+ data_type: images # [images|videos|features]
+
+ vis_processor:
+ train:
+ name: "clip_image_train"
+ image_size: 224
+
+ text_processor:
+ train:
+ name: blip_instruction
+ modality: image
+ task: caption
+
+ build_info:
+ # Be careful not to append minus sign (-) before split to avoid itemizing
+ annotations:
+ train:
+ url:
+ - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/capfilt14m/annotation.json
+ # - /export/share/datasets/vision_language/capfilt_14m_new/annotation.json
+ storage:
+ - capfilt14m/annotations/annotation.json
+ # - /export/share/datasets/vision_language/capfilt_14m_new/annotation.json
+
+ images:
+ storage: /export/share/datasets/vision/coco/images
\ No newline at end of file
diff --git a/lavis/configs/datasets/charade/defaults_cap.yaml b/lavis/configs/datasets/charade/defaults_cap.yaml
new file mode 100644
index 000000000..c28a1ada2
--- /dev/null
+++ b/lavis/configs/datasets/charade/defaults_cap.yaml
@@ -0,0 +1,52 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ charade_caption: # name of the dataset builder
+ # data_dir: ${env.data_dir}/datasets
+ data_type: videos # [images|videos|features]
+
+ vis_processor:
+ train:
+ name: alpro_video_train
+ n_frms: 4
+ image_size: 224
+ min_scale: 0.9
+ max_scale: 1.0
+ full_video: False
+ eval:
+ name: alpro_video_eval
+ n_frms: 4
+ image_size: 224
+ min_scale: 0.9
+ max_scale: 1.0
+ full_video: False
+
+ text_processor:
+ train:
+ name: blip_caption
+ eval:
+ name: blip_caption
+
+
+ build_info:
+ # Be careful not to append minus sign (-) before split to avoid itemizing
+ annotations:
+ train:
+ url:
+ - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/charade/train_lavis.json
+ # - /export/video-language-dataset/data/charade/train_lavis.json
+ storage:
+ - charade/annotations/train.json
+ # - /export/video-language-dataset/data/charade/train_lavis.json
+ val:
+ url:
+ - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/charade/val_lavis.json
+ # - /export/video-language-dataset/data/charade/val_lavis.json
+ storage:
+ - charade/annotations/val.json
+ # - /export/video-language-dataset/data/charade/val_lavis.json
+ videos:
+ storage: /export/video-language-dataset/data/charade/videos
diff --git a/lavis/configs/datasets/charade/defaults_cap_instruct.yaml b/lavis/configs/datasets/charade/defaults_cap_instruct.yaml
new file mode 100644
index 000000000..f1a7ba82c
--- /dev/null
+++ b/lavis/configs/datasets/charade/defaults_cap_instruct.yaml
@@ -0,0 +1,54 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ charade_caption_instruct: # name of the dataset builder
+ # data_dir: ${env.data_dir}/datasets
+ data_type: videos # [images|videos|features]
+
+ vis_processor:
+ train:
+ name: alpro_video_train
+ n_frms: 4
+ image_size: 224
+ min_scale: 0.9
+ max_scale: 1.0
+ full_video: False
+ eval:
+ name: alpro_video_eval
+ n_frms: 4
+ image_size: 224
+ min_scale: 0.9
+ max_scale: 1.0
+ full_video: False
+
+
+ text_processor:
+ train:
+ name: blip_instruction
+ modality: video
+ task: caption
+ eval:
+ name: blip_caption
+
+ build_info:
+ # Be careful not to append minus sign (-) before split to avoid itemizing
+ annotations:
+ train:
+ url:
+ - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/charade/train_lavis.json
+ # - /export/video-language-dataset/data/charade/train_lavis.json
+ storage:
+ - charade/annotations/train.json
+ # - /export/video-language-dataset/data/charade/train_lavis.json
+ val:
+ url:
+ - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/charade/val_lavis.json
+ # - /export/video-language-dataset/data/charade/val_lavis.json
+ storage:
+ - charade/annotations/val.json
+ # - /export/video-language-dataset/data/charade/val_lavis.json
+ videos:
+ storage: /export/video-language-dataset/data/charade/videos
diff --git a/lavis/configs/datasets/clotho/defaults_mm_cap.yaml b/lavis/configs/datasets/clotho/defaults_mm_cap.yaml
new file mode 100644
index 000000000..e04f2b814
--- /dev/null
+++ b/lavis/configs/datasets/clotho/defaults_mm_cap.yaml
@@ -0,0 +1,41 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ clothov2: # name of the dataset builder
+ audio_processor:
+ train:
+ name: beats_audio
+ eval:
+ name: beats_audio
+
+ text_processor:
+ train:
+ name: "blip_caption"
+ eval:
+ name: "blip_caption"
+
+
+ data_type: [audio]
+
+ build_info:
+ kwargs:
+ clotho_root: /export/einstein-vision/audio_datasets/clothov2/
+ split: eval
+
+ annotations:
+ train:
+ url:
+ - https://zenodo.org/record/4783391/files/clotho_captions_development.csv
+ storage:
+ - clothov2/annotations/clotho_captions_development.csv
+ val:
+ url:
+ - https://zenodo.org/record/4783391/files/clotho_captions_evaluation.csv
+ storage:
+ - clothov2/annotations/clotho_captions_evaluation.csv
+ audio:
+ storage: /export/einstein-vision/audio_datasets/clothov2/CLOTHO_v2.1/clotho_audio_files/
+
\ No newline at end of file
diff --git a/lavis/configs/datasets/clotho/defaults_mm_cap_instruct.yaml b/lavis/configs/datasets/clotho/defaults_mm_cap_instruct.yaml
new file mode 100644
index 000000000..5a4b755b2
--- /dev/null
+++ b/lavis/configs/datasets/clotho/defaults_mm_cap_instruct.yaml
@@ -0,0 +1,42 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ clothov2_instruct: # name of the dataset builder
+ audio_processor:
+ train:
+ name: beats_audio
+ eval:
+ name: beats_audio
+
+ text_processor:
+ train:
+ name: "blip_instruction"
+ modality: audio
+ task: caption
+ eval:
+ name: "blip_caption"
+
+ data_type: [audio]
+
+ build_info:
+ kwargs:
+ clotho_root: /export/einstein-vision/audio_datasets/clothov2/
+ split: eval
+
+ annotations:
+ train:
+ url:
+ - https://zenodo.org/record/4783391/files/clotho_captions_development.csv
+ storage:
+ - clothov2/annotations/clotho_captions_development.csv
+ val:
+ url:
+ - https://zenodo.org/record/4783391/files/clotho_captions_evaluation.csv
+ storage:
+ - clothov2/annotations/clotho_captions_evaluation.csv
+ audio:
+ storage: /export/einstein-vision/audio_datasets/clothov2/CLOTHO_v2.1/clotho_audio_files/
+
\ No newline at end of file
diff --git a/lavis/configs/datasets/clotho/defaults_mm_qa.yaml b/lavis/configs/datasets/clotho/defaults_mm_qa.yaml
new file mode 100644
index 000000000..e7d97651b
--- /dev/null
+++ b/lavis/configs/datasets/clotho/defaults_mm_qa.yaml
@@ -0,0 +1,44 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ clotho_qa: # name of the dataset builder
+ audio_processor:
+ train:
+ name: beats_audio
+ eval:
+ name: beats_audio
+
+ text_processor:
+ train:
+ name: "blip_caption"
+ eval:
+ name: "blip_caption"
+
+
+ data_type: [audio]
+
+ build_info:
+
+ annotations:
+ train:
+ url:
+ - https://zenodo.org/records/6473207/files/clotho_aqa_train.csv
+ storage:
+ - clotho_Qa/annotations/clotho_aqa_train.csv
+ val:
+ url:
+ - https://zenodo.org/records/6473207/files/clotho_aqa_val.csv
+ storage:
+ - clotho_qa/annotations/clotho_aqa_val.csv
+
+ test:
+ url:
+ - https://zenodo.org/records/6473207/files/clotho_aqa_test.csv
+ storage:
+ - clotho_qa/annotations/clotho_aqa_test.csv
+ audio:
+ storage: /export/einstein-vision/audio_datasets/clotho-aqa/audio_files
+
\ No newline at end of file
diff --git a/lavis/configs/datasets/coco/defaults_cap_instruct.yaml b/lavis/configs/datasets/coco/defaults_cap_instruct.yaml
new file mode 100644
index 000000000..bce779906
--- /dev/null
+++ b/lavis/configs/datasets/coco/defaults_cap_instruct.yaml
@@ -0,0 +1,44 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ coco_caption_instruct: # name of the dataset builder
+ dataset_card: dataset_card/coco_caption.md
+ # data_dir: ${env.data_dir}/datasets
+ data_type: images # [images|videos|features]
+
+ vis_processor:
+ train:
+ name: "clip_image_train"
+ image_size: 224
+ eval:
+ name: "clip_image_eval"
+ image_size: 224
+
+ text_processor:
+ train:
+ name: blip_instruction
+ modality: image
+ task: caption
+ eval:
+ name: blip_caption
+
+ build_info:
+ # Be careful not to append minus sign (-) before split to avoid itemizing
+ annotations:
+ train:
+ url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_train.json
+ md5: aa31ac474cf6250ebb81d18348a07ed8
+ storage: coco/annotations/coco_karpathy_train.json
+ # val:
+ # url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_val.json
+ # md5: b273847456ef5580e33713b1f7de52a0
+ # storage: coco/annotations/coco_karpathy_val.json
+ # test:
+ # url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_test.json
+ # md5: 3ff34b0ef2db02d01c37399f6a2a6cd1
+ # storage: coco/annotations/coco_karpathy_test.json
+ images:
+ storage: /export/share/datasets/vision/coco/images
diff --git a/lavis/configs/datasets/coco/defaults_vqa_instruct.yaml b/lavis/configs/datasets/coco/defaults_vqa_instruct.yaml
new file mode 100644
index 000000000..5a85202b3
--- /dev/null
+++ b/lavis/configs/datasets/coco/defaults_vqa_instruct.yaml
@@ -0,0 +1,57 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ coco_vqa_instruct:
+ # data_dir: ${env.data_dir}/datasets
+ data_type: images # [images|videos|features]
+
+ vis_processor:
+ train:
+ name: "clip_image_train"
+ image_size: 224
+ eval:
+ name: "clip_image_eval"
+ image_size: 224
+
+ text_processor:
+ train:
+ name: blip_instruction
+ modality: image
+ task: qa
+ eval:
+ name: blip_caption
+
+ build_info:
+ # Be careful not to append minus sign (-) before split to avoid itemizing
+ annotations:
+ train:
+ url:
+ - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_train.json
+ - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_val.json
+ storage:
+ - coco/annotations/vqa_train.json
+ - coco/annotations/vqa_val.json
+ # val:
+ # url:
+ # # TODO make this order insensitive
+ # - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_val_eval.json
+ # - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/answer_list.json
+ # - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/v2_OpenEnded_mscoco_val2014_questions.json
+ # - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/v2_mscoco_val2014_annotations.json
+ # storage:
+ # - coco/annotations/vqa_val_eval.json
+ # - coco/annotations/answer_list.json
+ # - coco/annotations/v2_OpenEnded_mscoco_val2014_questions.json
+ # - coco/annotations/v2_mscoco_val2014_annotations.json
+ # test:
+ # url:
+ # - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_test.json
+ # - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/answer_list.json
+ # storage:
+ # - coco/annotations/vqa_test.json
+ # - coco/annotations/answer_list.json
+ images:
+ storage: /export/share/datasets/vision/coco/images
diff --git a/lavis/configs/datasets/coin/defaults_cap.yaml b/lavis/configs/datasets/coin/defaults_cap.yaml
new file mode 100644
index 000000000..c783102da
--- /dev/null
+++ b/lavis/configs/datasets/coin/defaults_cap.yaml
@@ -0,0 +1,51 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ coin_caption: # name of the dataset builder
+ # data_dir: ${env.data_dir}/datasets
+ data_type: videos # [images|videos|features]
+
+ vis_processor:
+ train:
+ name: alpro_video_train
+ n_frms: 4
+ image_size: 224
+ min_scale: 0.9
+ max_scale: 1.0
+ full_video: False
+ eval:
+ name: alpro_video_eval
+ n_frms: 4
+ image_size: 224
+ min_scale: 0.9
+ max_scale: 1.0
+ full_video: False
+
+ text_processor:
+ train:
+ name: blip_caption
+ eval:
+ name: blip_caption
+
+ build_info:
+ # Be careful not to append minus sign (-) before split to avoid itemizing
+ annotations:
+ train:
+ url:
+ - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/coin/train.json
+ # - /export/video-language-dataset/data/coin/annotations/train_lavis.json
+ storage:
+ - coin/annotations/train.json
+ # - /export/video-language-dataset/data/coin/annotations/train_lavis.json
+ val:
+ url:
+ - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/coin/val.json
+ # - /export/video-language-dataset/data/coin/annotations/val_lavis.json
+ storage:
+ - coin/annotations/val.json
+ # - /export/video-language-dataset/data/coin/annotations/val_lavis.json
+ videos:
+ storage: /export/video-language-dataset/data/coin/annotations/videos/
diff --git a/lavis/configs/datasets/coin/defaults_cap_instruct.yaml b/lavis/configs/datasets/coin/defaults_cap_instruct.yaml
new file mode 100644
index 000000000..e1f8c8f46
--- /dev/null
+++ b/lavis/configs/datasets/coin/defaults_cap_instruct.yaml
@@ -0,0 +1,53 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ coin_caption_instruct: # name of the dataset builder
+ # data_dir: ${env.data_dir}/datasets
+ data_type: videos # [images|videos|features]
+
+ vis_processor:
+ train:
+ name: alpro_video_train
+ n_frms: 4
+ image_size: 224
+ min_scale: 0.9
+ max_scale: 1.0
+ full_video: False
+ eval:
+ name: alpro_video_eval
+ n_frms: 4
+ image_size: 224
+ min_scale: 0.9
+ max_scale: 1.0
+ full_video: False
+
+ text_processor:
+ train:
+ name: blip_instruction
+ task: caption
+ modality: image
+ eval:
+ name: blip_caption
+
+ build_info:
+ # Be careful not to append minus sign (-) before split to avoid itemizing
+ annotations:
+ train:
+ url:
+ - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/coin/train.json
+ # - /export/video-language-dataset/data/coin/annotations/train_lavis.json
+ storage:
+ - coin/annotations/train.json
+ # - /export/video-language-dataset/data/coin/annotations/train_lavis.json
+ val:
+ url:
+ - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/coin/val.json
+ # - /export/video-language-dataset/data/coin/annotations/val_lavis.json
+ storage:
+ - coin/annotations/val.json
+ # - /export/video-language-dataset/data/coin/annotations/val_lavis.json
+ videos:
+ storage: /export/video-language-dataset/data/coin/annotations/videos/
diff --git a/lavis/configs/datasets/conceptual_caption/defaults_12m_instruct.yaml b/lavis/configs/datasets/conceptual_caption/defaults_12m_instruct.yaml
new file mode 100644
index 000000000..1c576bbcc
--- /dev/null
+++ b/lavis/configs/datasets/conceptual_caption/defaults_12m_instruct.yaml
@@ -0,0 +1,37 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ conceptual_caption_12m_instruct:
+ # data_dir: ${env.data_dir}/datasets
+ data_type: images # [images|videos|features]
+
+ vis_processor:
+ train:
+ name: "clip_image_train"
+ image_size: 224
+ eval:
+ name: "clip_image_eval"
+ image_size: 224
+
+ text_processor:
+ train:
+ name: blip_instruction
+ task: caption
+ modality: image
+ eval:
+ name: blip_caption
+
+ build_info:
+ # Be careful not to append minus sign (-) before split to avoid itemizing
+ annotations:
+ train:
+ url:
+ - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/cc12m/x_instructblip_clean.json
+ # - /export/home/workspace/datasets/cc12m.json
+ storage:
+ - conceptual_caption/annotations/cc12m.json
+ images:
+ storage: conceptual_caption/images_12m
diff --git a/lavis/configs/datasets/conceptual_caption/defaults_3m_instruct.yaml b/lavis/configs/datasets/conceptual_caption/defaults_3m_instruct.yaml
new file mode 100644
index 000000000..05f2b523a
--- /dev/null
+++ b/lavis/configs/datasets/conceptual_caption/defaults_3m_instruct.yaml
@@ -0,0 +1,36 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ conceptual_caption_3m_instruct:
+ # data_dir: ${env.data_dir}/datasets
+ data_type: images # [images|videos|features]
+
+ vis_processor:
+ train:
+ name: "clip_image_train"
+ image_size: 224
+ eval:
+ name: "clip_image_eval"
+ image_size: 224
+
+ text_processor:
+ train:
+ name: blip_instruction
+ task: caption
+ modality: image
+ eval:
+ name: blip_caption
+
+ build_info:
+ # Be careful not to append minus sign (-) before split to avoid itemizing
+ annotations:
+ train:
+ url:
+ - /export/home/workspace/datasets/cc3m.json
+ storage:
+ - conceptual_caption/annotations/cc3m.json
+ images:
+ storage: conceptual_caption/images
diff --git a/lavis/configs/datasets/discriminatory_reasoning/defaults_mm_audio_video.yaml b/lavis/configs/datasets/discriminatory_reasoning/defaults_mm_audio_video.yaml
new file mode 100644
index 000000000..4b11dbd09
--- /dev/null
+++ b/lavis/configs/datasets/discriminatory_reasoning/defaults_mm_audio_video.yaml
@@ -0,0 +1,63 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ audio_video_discrn:
+ # data_dir: ${env.data_dir}/datasets
+ audio_processor:
+ train:
+ name: beats_audio
+ n_frames: 2
+ eval:
+ name: beats_audio
+ n_frames: 2
+
+ text_processor:
+ train:
+ name: "blip_caption"
+ eval:
+ name: "blip_caption"
+
+ video_processor:
+ train:
+ name: alpro_video_train
+ n_frms: 2
+ image_size: 224
+ min_scale: 0.9
+ max_scale: 1.0
+ full_video: True
+ eval:
+ name: alpro_video_eval
+ n_frms: 2
+ image_size: 224
+ min_scale: 0.9
+ max_scale: 1.0
+ full_video: True
+
+ data_type: [video, audio] # [images|videos|features]
+
+ build_info:
+ kwargs:
+ total: all
+ shuffle_modalities: False
+ balance_labels: True
+ dataset_name: audiocaps
+ ground_truth: False
+ raw: False
+
+ # Be careful not to append minus sign (-) before split to avoid itemizing
+ annotations:
+ val:
+ url:
+ - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/discrn/audiocaps.json
+ # - /export/home/LAVIS-xgen_mm/lavis/configs/datasets/discriminatory_reasoning/discriminatory_dataset/audiocaps_discrn.json
+ storage:
+ - discrn/annotations/audiocaps.json
+ # - /export/home/LAVIS-xgen_mm/lavis/configs/datasets/discriminatory_reasoning/discriminatory_dataset/audiocaps_discrn.json
+
+ audio:
+ storage: /export/einstein-vision/audio_datasets/audiocaps/AUDIOCAPS_32000Hz/audio/val
+ video:
+ storage: /export/einstein-vision/audio_datasets/audiocaps/video/AUDIOCAPS_32000Hz/audio/val
\ No newline at end of file
diff --git a/lavis/configs/datasets/discriminatory_reasoning/defaults_mm_image_pc.yaml b/lavis/configs/datasets/discriminatory_reasoning/defaults_mm_image_pc.yaml
new file mode 100644
index 000000000..2389aa754
--- /dev/null
+++ b/lavis/configs/datasets/discriminatory_reasoning/defaults_mm_image_pc.yaml
@@ -0,0 +1,48 @@
+ # Copyright (c) 2023, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+
+datasets:
+ image_pc_discrn: # name of the dataset builder
+ vis_processor:
+ train:
+ name: "clip_image_train"
+ eval:
+ name: "clip_image_eval"
+ pc_processor:
+ train:
+ name: "ulip_pc"
+ eval:
+ name: "ulip_pc"
+ text_processor:
+ train:
+ name: "blip_caption"
+ eval:
+ name: "blip_caption"
+
+ data_type: [images, pc] # [images|videos|features]
+
+
+ build_info:
+
+ kwargs:
+ total: all
+ shuffle_modalities: False
+ balance_labels: True
+ dataset_name: objaverse
+
+ # Be careful not to append minus sign (-) before split to avoid itemizing
+ annotations:
+ val:
+ url:
+ - https://storage.googleapis.com/sfr-xinstructblip-data-research/data/discrn/objaverse.json
+ # - /export/home/LAVIS-xgen_mm/lavis/configs/datasets/discriminatory_reasoning/discriminatory_dataset/objaverse_discrn.json
+ storage:
+ - discrn/annotations/objaverse.json
+ # - /export/home/LAVIS-xgen_mm/lavis/configs/datasets/discriminatory_reasoning/discriminatory_dataset/objaverse_discrn.json
+ pc:
+ storage: /export/einstein-vision/3d_vision/objaverse/objaverse_pc_parallel
+
+ images:
+ storage: /export/einstein-vision/3d_vision/objaverse_captions/images/
\ No newline at end of file
diff --git a/lavis/configs/datasets/discriminatory_reasoning/discriminatory_dataset/audiocaps_discrn.json b/lavis/configs/datasets/discriminatory_reasoning/discriminatory_dataset/audiocaps_discrn.json
new file mode 100644
index 000000000..9af592542
--- /dev/null
+++ b/lavis/configs/datasets/discriminatory_reasoning/discriminatory_dataset/audiocaps_discrn.json
@@ -0,0 +1 @@
+[{"captions": ["a person is burping then speaks and laughs", "a toilet flushes and a female speaks"], "sample_ids": ["wAAkbZToh8", "yaln9y8I7ms"], "start_seconds": ["0", "230"], "properties": ["burp, laugh, speak", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man burps and a woman speaks", "a toilet flushes and a man speaks"], "question": "which entity is a person", "label": 0}, {"captions": ["a dog barks and whimpers", "a stream of water runs briefly"], "sample_ids": ["sShpyu2l4YQ", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["barks, whimpers, dog", "stream, water, run"], "captions_pred_video": ["the puppies are playing with a toy", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a dog is barking and growling", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "a man speaks and is typing on a keyboard"], "sample_ids": ["xjhAnI2q6hM", "x9JovgqUcs"], "start_seconds": ["6", "500"], "properties": ["engine revs, vehicle, people", "a, man, speaks, keyboard"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", null], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a man speaks and types on a keyboard"], "question": "which entity is typing on a keyboard?", "label": 1}, {"captions": ["a man woman speak while crickets sing", "people cheer as a vehicle engine revs"], "sample_ids": ["zTLVJCo4WEE", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["a, crickets, sing", "engine revs, vehicle, people"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a truck is revving its engine and a man is speaking "], "question": "which entity is more likely to be in a vehicle?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sfAvvZwdLCY", "uYT5gxnyMWM"], "start_seconds": ["20", "50"], "properties": ["water drains, flushes, water", "female, spraying, scream"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["dogs barking and whimpering", "winds blows roughly as a vehicle races past"], "sample_ids": ["tIY7qOV3rEM", "xjvTpk2Zpr8"], "start_seconds": ["0", "70"], "properties": ["barking, whimpering, dog", "wind, blows, vehicle"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a jet engine roars and wind blows "], "question": "which entity is more calm", "label": 1}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["yNtRmrn0io8", "sLUnaPT5gM8"], "start_seconds": ["210", "0"], "properties": ["storm, distance, strike", "loud, laughter, intermittent"], "captions_pred_video": ["footage of a house in the middle of the night", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["rain falls and thunder roars", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a woman speaks happily and an animal chirps"], "sample_ids": ["uEU-Hg5MTN8", "uWAAAL4CIoc"], "start_seconds": ["27", "0"], "properties": ["a woman, laughs, animal", "a woman, chirps, animal"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking and a dog is barking "], "question": "which entity has a woman speaking and an animal chirps?", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "a child speaks in closed space"], "sample_ids": ["sncRqQ67iJU", "yW6FWLSLkx4"], "start_seconds": ["460", "40"], "properties": ["loud, repeatedly, man", "child, space, speak"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a person is snoring", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "a stream of water runs briefly"], "sample_ids": ["wDVMhEdTiVw", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["gun, shoot, water", "stream, water, run"], "captions_pred_video": ["a blurry image of trees and water in the forest", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["zofjfKhqLk8", "zFjIWfSD-4"], "start_seconds": ["10", "410"], "properties": ["background, metal, clank", "People, motor, brakes"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a machine?", "label": 0}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["voJh2gJxXhA", "sLUnaPT5gM8"], "start_seconds": ["50", "0"], "properties": ["music, frog, croak", "loud, laughter, intermittent"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["music is playing and crickets are chirping ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a small engine idles continuously", "an airplane engine spools and people speak"], "sample_ids": ["y5WII6cTH7k", "wTjoRj1se3U"], "start_seconds": ["40", "390"], "properties": ["engine, idle, continuously", "airplane, engine, spool"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a jet engine is running and people are talking"], "question": "which entity is a moving object", "label": 1}, {"captions": ["a motorcycle engine is idling", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vZAqdHZ81yA", "uEU-Hg5MTN8"], "start_seconds": ["180", "27"], "properties": ["engine, motorcycle, idling", "a woman, laughs, animal"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["an engine is idling loudly", "a woman is speaking and a baby is crying"], "question": "which entity is not a person?", "label": 0}, {"captions": ["male speech with light ticking", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["xO-Q2BlIIPU", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["male, speech, ticking", "a woman, something, fried"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "a stream of water runs briefly"], "sample_ids": ["uZesmtKZGSw", "x-PeY8Yb8M4"], "start_seconds": ["250", "300"], "properties": ["men, talk, cars", "stream, water, run"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["birds chirp as a bell rings", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["ziUT9IFTkjg", "zl9Dqx-j7q4"], "start_seconds": ["10", "6"], "properties": ["chirp, bell, ring", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "soft movement is accompanied by clocks ticking in the background"], "sample_ids": ["yajyRTUQk3U", "vlJS7LN2XyM"], "start_seconds": ["400", "30"], "properties": ["noise, woman, speak", "background, clocks, ticking"], "captions_pred_video": ["- a woman cooking in the kitchen", "of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a ticktock of a clock"], "question": "which entity has a quieter background", "label": 1}, {"captions": ["a train horn sounds and railroad crossing ring", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["s7knHCFW82w", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["horn, sound, train", "airplane, boy, fly"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["u--KhUW8l1Y", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["horn, siren, life", "a woman, laughs, animal"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["rain falls on a surface as men speak and thunder roars", "multiple people speak and children yell while water gurgles"], "sample_ids": ["w0xsN8X18Y", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["rain, thunder, surface", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["music plays followed by gunshots and then an explosion", "water flows as men speak and yell"], "sample_ids": ["xKB8O8LTs6s", "vJ7JPEFhyLA"], "start_seconds": ["70", "16"], "properties": ["music, gunshots, explosion", "water, flow, men"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["wyllXV6PjKo", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["a kid, talk, cry", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a woman speaks and a baby cries", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["speaking following by laughing and clapping", "a machine beeps continuously"], "sample_ids": ["u2f5NpsoHBg", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["person, laugh, clap", "beeps, machine, continuously"], "captions_pred_video": ["is being projected on a screen at the front of the stage", null], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "three men talk while wind blows and some liquid flows"], "sample_ids": ["uYT5gxnyMWM", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["a, scream, girl", "three men, wind, flow"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a girl speaking followed by a scream and more girls talking?", "label": 0}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vfYTJq7nU", "vfYTJq7nU"], "start_seconds": ["130", "130"], "properties": ["rustling, ducks, quack", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a duck quacks and a woman speaks", "a duck quacks and a woman speaks"], "question": "which entity is a video of ducks?", "label": 0}, {"captions": ["a motorcycle engine works nearby", "a horn rings out as a machine runs by"], "sample_ids": ["tOSWIURC-4", "slZLHwNbbt4"], "start_seconds": ["0", "300"], "properties": ["engine, work, nearby", "a, horn, run"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a lawn mower is running ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "water is sprayed across a hard surface"], "sample_ids": ["soTOh3zYJfY", "sQwlkXjQabo"], "start_seconds": ["40", "10"], "properties": ["vehicle, skid, tires", "water, spray, surface"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["un9VQlzgZM", "yajyRTUQk3U"], "start_seconds": ["5", "400"], "properties": ["females, talk, laugh", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["y8dSeubCNI", "uEU-Hg5MTN8"], "start_seconds": ["4", "27"], "properties": ["men, women, car", "animal, grunts, snorts"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["an engine revving and people talking in the background", "a woman is speaking and a baby is crying"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a vehicle engine runs while a siren and horn sound", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["u--KhUW8l1Y", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["engine, sound, horn", "female, spraying, scream"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["u6jIvCtKarQ", "yajyRTUQk3U"], "start_seconds": ["70", "400"], "properties": ["a, man, speaks", "a woman, something, fried"], "captions_pred_video": ["footage of a person using a blender on a stove top", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["a person sniffles and sneezes", "a frog croaks as other frogs croak in the background"], "sample_ids": ["uRlbY6aoBU", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["sneezes, sniffles, person", "background, frog, croak"], "captions_pred_video": [null, "a close up of a frog in the water"], "captions_pred_audio": ["a man is sneezing ", "a frog is croaking"], "question": "which entity is a croaker", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["zcDwZ6W7E3E", "zFjIWfSD-4"], "start_seconds": ["180", "410"], "properties": ["a, man, speak", "People, motor, brakes"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", null], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running?", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["weDbePuc-Xc", "zj2R0XoFr5k"], "start_seconds": ["40", "50"], "properties": ["cartoon character, music, vocalize", "airplane, boy, fly"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "a clock ticktocks"], "sample_ids": ["ukg5L09Wpvo", "v-g-j2uTByM"], "start_seconds": ["150", "30"], "properties": ["clickety-clack, train, whistle", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "a railroad crossing bell rings as a train horn blows"], "sample_ids": ["w0xsN8X18Y", "tZGN5a7ybxo"], "start_seconds": ["30", "60"], "properties": ["music, surface, rain", "ring, train, horn"], "captions_pred_video": [null, "is taken from a moving vehicle on the train tracks"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a train is moving and blowing its horn "], "question": "which entity is a warning", "label": 1}, {"captions": ["an insect buzzes around continuously", "water flows and trickles"], "sample_ids": ["v25l1jef3JY", "tB7hWb9gTuQ"], "start_seconds": ["0", "30"], "properties": ["buzzes, continuously, insect", "water, flow, trickle"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "water is splashing and gurgling"], "question": "which entity is not a living thing", "label": 1}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["wqZ135Ssz0", "y2bVZ7rz-5M"], "start_seconds": ["60", "280"], "properties": ["two men, woman, birds", "motor noise, horn, siren"], "captions_pred_video": [null, "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a beep occurs briefly", "a drill drills through something then people begin laughing"], "sample_ids": ["xtWeJ56-U-g", "tEE3MpBt1sg"], "start_seconds": ["20", "50"], "properties": ["beep, occur, briefly", "drill, something, laugh"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "people are laughing breathing and speaking with background noise "], "question": "which entity is a drill?", "label": 1}, {"captions": ["a helicopter engine runs continuously", "paper is crumpling consistently"], "sample_ids": ["ugHJF0hfYkg", "v5cSxLaHADY"], "start_seconds": ["10", "0"], "properties": ["engine, running, continuously", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a helicopter is flying overhead ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a person whistles a meandering tune", "birds chirp and objects are moved around"], "sample_ids": ["uFoga8sHpiw", "yPUYU6t3rwo"], "start_seconds": ["90", "370"], "properties": ["person, tune, whistle", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of a bird in a cage", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a person whistles a song", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["ugHJF0hfYkg", "w5W5Kqtc8E"], "start_seconds": ["10", "100"], "properties": ["loud, intense, propeller", "wind, blow, vehicle"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a car speeding up in the distance", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["u0TrcHhkPQ", "zl9Dqx-j7q4"], "start_seconds": ["20", "6"], "properties": ["distance, car, speed", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vqZuVbG6-HI", "ukg5L09Wpvo"], "start_seconds": ["130", "150"], "properties": ["background, male, female", "clickety-clack, train, whistle"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["zgUgkpk78xU", "xfaoyyzw2WU"], "start_seconds": ["70", "180"], "properties": ["horn, bells, ring", "loud, jet engine, roar"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["shmR4OZtzqA", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["man, engine, idle", "loud, laughter, intermittent"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man speaks while a motor runs", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["wind blows and a stream of water flows nearby", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sYITalLZjj4", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["stream, flow, wind", "engine, idle, woman"], "captions_pred_video": ["two ducks are swimming in the water near each other", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["wind blows and birds chirp", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["yajyRTUQk3U", "su6FAOcOA8c"], "start_seconds": ["400", "4"], "properties": ["a woman, something, fried", "engine, idle, woman"], "captions_pred_video": ["- a woman cooking in the kitchen", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a woman is speaking and a subway train is moving "], "question": "which woman is speaking while something is fried?", "label": 0}, {"captions": ["a female speaks softly as paper crinkles", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xvDdE3zNf8Y", "tDVADusiIoc"], "start_seconds": ["120", "60"], "properties": ["a, female, speaks", "water, radio, man"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["an engine runs loudly", "a car speeding up in the distance"], "sample_ids": ["vqZuVbG6-HI", "u0TrcHhkPQ"], "start_seconds": ["130", "20"], "properties": ["loud, engine, run", "distance, car, speed"], "captions_pred_video": ["footage is blurry because it's raining outside", null], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["an aircraft engine runs", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["yLCORCnd35Q", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["engine, aircraft, runs", "airplane, boy, fly"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["an airplane engine roars increasingly louder", "people applaud and hoot and chat quietly"], "sample_ids": ["vBslzh7saPw", "wwyfGO2J4"], "start_seconds": ["90", "90"], "properties": ["engine, roar, louder", "people, applaud, hoot"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "people are clapping and speaking with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "water pouring and bubbling"], "sample_ids": ["sOa7g-44Dag", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["audio, scratching, man", "water, bubbles, pouring"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "water is running from a faucet"], "question": "which entity is a video", "label": 1}, {"captions": ["a man talks as several small engines run", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["u9A6VZQCZpU", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["a, man, talk", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaking with light rustling", "a man speaks followed by another man speaking outside"], "sample_ids": ["zOZleIRqZm4", "viuTg1M-dqg"], "start_seconds": ["80", "30"], "properties": ["light, rustling, man", "two men, speak, follow"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more than one man speaking?", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "a child speaks in closed space"], "sample_ids": ["x5cuQjOdM3E", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["cat, talk, meow", "child, space, speak"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a cat meows and a woman speaks", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a man talks while metallic objects are rapped and steam is released"], "sample_ids": ["vbZ-0lGPneg", "vuUVPzd2FXw"], "start_seconds": ["30", "160"], "properties": ["a woman, a television program, a bird", "a, steam, release"], "captions_pred_video": ["of a man holding a baby duck in his hands", "of the person cooking on the grill with a spatula"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a man is speaking and dishes are clanging"], "question": "which entity has a man talking?", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "a door slams shut and an object moves on a hard surface"], "sample_ids": ["uC9dtII1KDI", "zkKdxzNC97Y"], "start_seconds": ["150", "27"], "properties": ["wind, gusts, distance", "hard, surface, door"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "footage of the door opening and closing in slow motion"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a door is opened and closed"], "question": "which entity is not a door?", "label": 0}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "a toilet flushes and a female speaks"], "sample_ids": ["sapQIQUhFc", "yaln9y8I7ms"], "start_seconds": ["280", "230"], "properties": ["liquid, flow, distance", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a toilet flushes and a man speaks"], "question": "which entity is about a toilet?", "label": 1}, {"captions": ["a door opens and closes", "ticking continues without interruption"], "sample_ids": ["vBHyYJ8pL0", "v-g-j2uTByM"], "start_seconds": ["2", "30"], "properties": ["open, close, door", "ticking, continuous, clock"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a clock is ticking loudly"], "question": "which entity is a clock", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a car accelerates and wind blows"], "sample_ids": ["uzQnlJXBbOM", "u0TrcHhkPQ"], "start_seconds": ["50", "20"], "properties": ["ringing, beep, stop", "accelerates, wind, blows"], "captions_pred_video": ["footage of a person using a cell phone on a table", null], "captions_pred_audio": ["a telephone rings and a man speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "an airplane flies overhead as a woman speaks"], "sample_ids": ["vbpKkWvfOu4", "zj2R0XoFr5k"], "start_seconds": ["560", "50"], "properties": ["a, man, speaks", "airplane, fly, overhead"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a woman speaks while a helicopter flies overhead "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "water runs into a sink while men speak"], "sample_ids": ["wRBHTgrbiwg", "vzceMbklWc"], "start_seconds": ["50", "180"], "properties": ["bird, owl, speak", "water, sink, run"], "captions_pred_video": ["of a bee pollinating the flowers in the field", null], "captions_pred_audio": ["birds are chirping and insects are buzzing", "water is running and a man is speaking"], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["electronic beeps occur in a short series", "water flows as men speak and yell"], "sample_ids": ["y682ml90jGw", "vJ7JPEFhyLA"], "start_seconds": ["11", "16"], "properties": ["beeps, series, electronic", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a beeping sound is being made ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more natural", "label": 1}, {"captions": ["wind blows strongly and a young man speaks", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["vs65y4qmyBE", "sLUnaPT5gM8"], "start_seconds": ["340", "0"], "properties": ["wind, blows, strongly", "loud, laughter, intermittent"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["three men talk while wind blows and some liquid flows", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["vJ7JPEFhyLA", "y8WEcpOlT3I"], "start_seconds": ["16", "40"], "properties": ["three men, wind, flow", "harsh, wind, blows"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking with wind noise in the background "], "question": "which entity has a harsher wind blowing", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "water flows and trickles"], "sample_ids": ["vimzuGQvdcU", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["a, man, yells", "water, flow, trickle"], "captions_pred_video": ["a group of people are rafting down a river", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "vehicles pass by on a roadway"], "sample_ids": ["w5W5Kqtc8E", "tgbONvsP47Y"], "start_seconds": ["100", "0"], "properties": ["wind, blow, vehicle", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a car is driving on the road "], "question": "which vehicle is moving", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "plastic is tapped on while someone speaks"], "sample_ids": ["soTOh3zYJfY", "wvKpEYswXO0"], "start_seconds": ["40", "150"], "properties": ["vehicle, skid, tires", "plastic, tap, speak"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["water splashes as an animal walks through", "continuous snoring"], "sample_ids": ["w1ir-sZ3Im8", "sLkeqCDJIyw"], "start_seconds": ["90", "120"], "properties": ["animal, water, splashes", "loud, snoring, noise"], "captions_pred_video": ["footage of a group of people riding horses through a river", ", what is the man doing on the couch? sleeping"], "captions_pred_audio": ["water splashes and gurgles as people speak", "a person is snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a bird is chirping and tweeting a bird song"], "sample_ids": ["vZAw4apG0Es", "wPz6QRAkEb4"], "start_seconds": ["30", "60"], "properties": ["background, tick, repeat", "chirps, tweets, song"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "a bird in a cage on top of a pole"], "captions_pred_audio": ["a clock is ticking and people are talking", "birds are chirping in the background "], "question": "which entity is a bird", "label": 1}, {"captions": ["music plays followed by gunshots and then an explosion", "a dog barks and whimpers"], "sample_ids": ["xKB8O8LTs6s", "sShpyu2l4YQ"], "start_seconds": ["70", "0"], "properties": ["music, gunshots, explosion", "barks, whimpers, dog"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "the puppies are playing with a toy"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a dog is barking and growling"], "question": "which entity is more calm", "label": 1}, {"captions": ["a person snoring", "an infant crying frantically"], "sample_ids": ["t8tv5YRMJUg", "zwOBqeFTgiU"], "start_seconds": ["0", "30"], "properties": ["a person, snore, loud", "cry, infant, frantically"], "captions_pred_video": ["of a man getting his face licked by another man", "of the baby crying in the car seat"], "captions_pred_audio": ["a person sniffs and breathes heavily", "a baby cries loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "an adult man speaks over glass clinking"], "sample_ids": ["vJvryTwuAV8", "u6jIvCtKarQ"], "start_seconds": ["16", "70"], "properties": ["audience, cheer, man", "a, man, speaks"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "footage of a person using a blender on a stove top"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a man is speaking and dishes are being moved with background noise "], "question": "which man speaks over glass clinking", "label": 1}, {"captions": ["birds chirp as a train approaches", "people cheer as a vehicle engine revs"], "sample_ids": ["xM4joTqDVp4", "xjhAnI2q6hM"], "start_seconds": ["160", "6"], "properties": ["bird, chirp, train", "engine revs, vehicle, people"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vJvryTwuAV8", "uEU-Hg5MTN8"], "start_seconds": ["16", "27"], "properties": ["audience, cheer, man", "a woman, laughs, animal"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a woman is speaking and a baby is crying"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["zj2R0XoFr5k", "tiDFTC-5vU"], "start_seconds": ["50", "30"], "properties": ["airplane, boy, fly", "male, duck, laugh"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", null], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking?", "label": 1}, {"captions": ["a drill runs and two people laugh", "a person speaks briefly"], "sample_ids": ["tEE3MpBt1sg", "zOZleIRqZm4"], "start_seconds": ["50", "80"], "properties": ["two people, laugh, drill", "person, talk, brief"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man is speaking with crickets chirping in the background"], "question": "which entity is talking", "label": 1}, {"captions": ["a person screams glaringly", "an airplane engine roars increasingly louder"], "sample_ids": ["xC8kbrKJmco", "vBslzh7saPw"], "start_seconds": ["0", "90"], "properties": ["glaringly, screams, person", "engine, roar, louder"], "captions_pred_video": [null, "a pickup truck carrying a large object down the road"], "captions_pred_audio": ["a goat is bleating ", "a jet engine roars and accelerates "], "question": "which entity is louder", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "men speak and a nozzle sprays liquid"], "sample_ids": ["v5P-ThUCINM", "wRV8yMk886E"], "start_seconds": ["400", "0"], "properties": ["background, chirp, bird", "liquid, spray, nozzle"], "captions_pred_video": [null, "two cars are parked in a parking lot at night"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a man speaks followed by a loud burst"], "question": "which entity is more likely to be used in a science class", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a man speaks as a car is passing by"], "sample_ids": ["tDVADusiIoc", "sK4u5T8hW78"], "start_seconds": ["60", "30"], "properties": ["water, radio, man", "a, car, pass"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more likely to be in a car", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["v5P-ThUCINM", "su6FAOcOA8c"], "start_seconds": ["400", "4"], "properties": ["background, chirp, bird", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["an infant crying frantically", "people applaud and hoot and chat quietly"], "sample_ids": ["zwOBqeFTgiU", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["cry, infant, frantically", "people, applaud, hoot"], "captions_pred_video": ["of the baby crying in the car seat", null], "captions_pred_audio": ["a baby cries loudly", "people are clapping and speaking with background noise "], "question": "which entity is a group of people?", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "food is frying while a woman speaks"], "sample_ids": ["sEprKHm8Sj8", "yhQ2Lg-7qDY"], "start_seconds": ["90", "130"], "properties": ["car, tires, slows", "food, woman, speak"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "a pan filled with meat and sauce being cooked on a stove top"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a faucet is running and a man is speaking"], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a woman speaks and is crumpling paper", "a stream of water runs briefly"], "sample_ids": ["xvDdE3zNf8Y", "x-PeY8Yb8M4"], "start_seconds": ["120", "300"], "properties": ["A, crumple, paper", "stream, water, run"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman speaks and crumples paper", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["the rumbling of a bus followed by a soft male voice", "small dogs yip and bark sharply"], "sample_ids": ["vK93VuO0yNc", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["male voice, bus, rumble", "bark, yip, sharply"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a car drives by with wind noise in the background ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "someone snores nearby"], "sample_ids": ["wyllXV6PjKo", "spJCm8tD9Zo"], "start_seconds": ["30", "90"], "properties": ["a baby, a woman, a man", "someone snores, nearby, someone"], "captions_pred_video": [null, "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a woman speaks and a baby cries", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a man talks as several small engines run", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["u9A6VZQCZpU", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["a, man, talk", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a jet engine roars "], "question": "which entity is about a man talking?", "label": 0}, {"captions": ["a vehicle engine runs and someone speaks", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["zF8yoL0rkbI", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["engine, run, someone", "a woman, a television program, a bird"], "captions_pred_video": ["footage of the traffic on the street at night", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program playing?", "label": 1}, {"captions": ["a man speaks as a machine runs", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vD6lYD1l0BY", "ukg5L09Wpvo"], "start_seconds": ["330", "150"], "properties": ["a, machine, run", "clickety-clack, train, whistle"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["several insects fly while two men talk", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["s-T9OVOiMLo", "tdWhHV3X25Q"], "start_seconds": ["330", "60"], "properties": ["several, fly, men", "applause, audience, yells"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["birds chirp and wind blows", "an airplane accelerates briefly"], "sample_ids": ["sxIvBMSavMQ", "zjTG0gaGCUI"], "start_seconds": ["210", "80"], "properties": ["birds, chirp, wind", "accelerates, airplane, briefly"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", null], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a jet engine roars as wind blows "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks and dog vocalizes", "a man speaks on a radio as wind blows"], "sample_ids": ["uWAAAL4CIoc", "tDVADusiIoc"], "start_seconds": ["0", "60"], "properties": ["a, dog, vocalize", "man, radio, blows"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking on a radio?", "label": 1}, {"captions": ["birds vocalize and a man speaks", "water is sprayed across a hard surface"], "sample_ids": ["v0wPrLBI3hg", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["vocalize, bird, speak", "water, spray, surface"], "captions_pred_video": ["footage of the pigeons feeding on the ground", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["people speak then an engine runs", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["uMTTDZ2mb4", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["engine, run, people", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a race car accelerates and revs its engine "], "question": "which entity is about a vehicle engine?", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["u5RmF3c3Aw", "ukg5L09Wpvo"], "start_seconds": ["60", "150"], "properties": ["engine, car, zoom", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a man talks as several small engines run", "a man speaks as a motor runs in the background"], "sample_ids": ["u9A6VZQCZpU", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["a, man, talk", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["bees buzz and wind blows", "a piece of wood is being placed down and sawed"], "sample_ids": ["tMJne1a4AFI", "uiItxDsDMFI"], "start_seconds": ["0", "30"], "properties": ["bees buzz, wind blows, bees", "wood, piece, saw"], "captions_pred_video": ["a swarm of bees on the ground", "a man cutting a log with an axe in the woods"], "captions_pred_audio": ["a swarm of bees buzzing around", "a saw is being used with background noise "], "question": "which entity is being cut", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "a woman speaks as she rubs two objects together"], "sample_ids": ["y8WEcpOlT3I", "vzxHnu-SFEw"], "start_seconds": ["40", "80"], "properties": ["harsh, wind, blows", "two objects, woman, speak"], "captions_pred_video": ["on how to use a sewing machine youtube", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "a stream of water runs briefly"], "sample_ids": ["x9JovgqUcs", "x-PeY8Yb8M4"], "start_seconds": ["500", "300"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "small dogs yip and bark sharply"], "sample_ids": ["tDVADusiIoc", "v-wcQf4BDY0"], "start_seconds": ["60", "120"], "properties": ["man, radio, blows", "bark, yip, sharply"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["an insect buzzes around continuously", "loud clanking and banging with brief male speech"], "sample_ids": ["v25l1jef3JY", "sWZzXuWYY"], "start_seconds": ["0", "420"], "properties": ["buzzes, continuously, insect", "male, speech, banging"], "captions_pred_video": ["a black background with a cartoon character in the foreground", null], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a sewing machine runs and a man speaks"], "question": "which entity is louder", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "small dogs yip and bark sharply"], "sample_ids": ["zdYdyF9-m8U", "v-wcQf4BDY0"], "start_seconds": ["7", "120"], "properties": ["wind, crash, shoreline", "bark, yip, sharply"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["waves crash and wind blows ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "bird squawks are accompanied by a man and woman speaking"], "sample_ids": ["ylpYOorfH4o", "wqZ135Ssz0"], "start_seconds": ["410", "60"], "properties": ["motor, run, steady", "man, woman, squawks"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", null], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["wqZ135Ssz0", "yajyRTUQk3U"], "start_seconds": ["60", "400"], "properties": ["man, woman, squawks", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a woman is speaking while food is frying in the background"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a man speaks as a car is passing by"], "sample_ids": ["y2bVZ7rz-5M", "sK4u5T8hW78"], "start_seconds": ["280", "30"], "properties": ["motor noise, horn, siren", "a, car, pass"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["multiple birds vocalize and wind blows", "birds chirp and objects are moved around"], "sample_ids": ["uoGVs9yUqY4", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["multiple, vocalize, wind", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["for how to make a wooden shed door youtube", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["children speak as a female ask them questions", "a telephone rings followed by a woman talking"], "sample_ids": ["wEBlkGWVWwE", "tGcFnX0GHI"], "start_seconds": ["260", "0"], "properties": ["female, speak, questions", "ring, talk, woman"], "captions_pred_video": ["shows a person writing on the whiteboard", null], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "pigeons vocalize and birds chirp"], "sample_ids": ["vzceMbklWc", "uiS58TNyUiw"], "start_seconds": ["180", "430"], "properties": ["water, faucet, sink", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["water is running and a man is speaking", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["paper folding and crinkling", "a frog vocalizes as birds chirp"], "sample_ids": ["zPpG3RD8lSs", "wqUmIEzuNz4"], "start_seconds": ["20", "30"], "properties": ["paper, fold, crinkle", "frog, bird, vocalize"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "a frog sitting in the grass on a sunny day"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a cat meows and rustles"], "question": "which entity is not a frog?", "label": 0}, {"captions": ["a dark barks and whimpers", "multiple people speak and children yell while water gurgles"], "sample_ids": ["sYj4hpDUZDQ", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["barks, whimpers, dark", "multiple, people, yell"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", null], "captions_pred_audio": ["a dog barks and a cat meows", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a vehicle engine revs and tires squeal", "a car accelerates and wind blows"], "sample_ids": ["yDoT73BWsdA", "u0TrcHhkPQ"], "start_seconds": ["10", "20"], "properties": ["engine revs, tires squeal, vehicle", "accelerates, wind, blows"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["wind blowing and birds chirping with the distant cooing of a large bird", "wind blowing followed by a zoom"], "sample_ids": ["wRBHTgrbiwg", "vr8ZXjEBhMQ"], "start_seconds": ["50", "150"], "properties": ["birds, chirp, cooing", "wind, blow, zoom"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom?", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by an electronic beep", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wSVhSdj0F0", "zj2R0XoFr5k"], "start_seconds": ["10", "50"], "properties": ["horn honks, keys jingle, electronic beep", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "a man speaks with another voice speaking in the background"], "sample_ids": ["wqZ135Ssz0", "u21-Z5gJCB8"], "start_seconds": ["60", "30"], "properties": ["man, woman, squawks", "background, voice, man"], "captions_pred_video": [null, "- a person cooking eggs in a pan on the stove"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking with another voice speaking in the background?", "label": 1}, {"captions": ["wind blows as people chatter quietly", "water splashes and a door squeaks"], "sample_ids": ["xBxDz0CFVn0", "sdXV-ylviw"], "start_seconds": ["30", "190"], "properties": ["wind, chatter, people", "sound, splash, door"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a dog barks and taps with background noise "], "question": "which entity is more quiet", "label": 0}, {"captions": ["long loud burping by a man", "a man speaks as a car is passing by"], "sample_ids": ["xmiUIOhtZyQ", "sK4u5T8hW78"], "start_seconds": ["60", "30"], "properties": ["loud, burp, man", "a, car, pass"], "captions_pred_video": ["homer simpson drinking a beer", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a person burps and music plays in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "a car speeding up in the distance"], "sample_ids": ["wwyfGO2J4", "u0TrcHhkPQ"], "start_seconds": ["90", "20"], "properties": ["people, applaud, hoot", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "a propeller rotates loudly and intensely"], "sample_ids": ["zofjfKhqLk8", "ugHJF0hfYkg"], "start_seconds": ["10", "10"], "properties": ["background, metal, clings", "loud, intense, propeller"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a vehicle engine revs and tires squeal", "water pouring and bubbling"], "sample_ids": ["yDoT73BWsdA", "uyRfq-jKPpo"], "start_seconds": ["10", "50"], "properties": ["engine revs, tires squeal, vehicle", "water, bubbles, pouring"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a baby cries and a woman speaks", "a stream of water runs briefly"], "sample_ids": ["tMbMDvT50j8", "x-PeY8Yb8M4"], "start_seconds": ["12", "300"], "properties": ["a, cry, woman", "stream, water, run"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a baby cries and a woman speaks", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["s6DESzUTGjY", "tDVADusiIoc"], "start_seconds": ["16", "60"], "properties": ["wind, laugh, woman", "water, radio, man"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a woman laughing?", "label": 0}, {"captions": ["a horn honks and then loudly blares", "a duck quacks continuously"], "sample_ids": ["wnpJndXuxLc", "vh30P49Po6s"], "start_seconds": ["50", "30"], "properties": ["horn, honk, loud", "quacks, continuously, duck"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a duck is quacking loudly"], "question": "which entity is a bird", "label": 1}, {"captions": ["people speak in a closed space", "wind blows as people chatter quietly"], "sample_ids": ["sTpirNYo8vQ", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["people, space, speak", "wind, chatter, people"], "captions_pred_video": ["of a man taking a selfie on a bus", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a man is speaking with wind noise in the background "], "question": "which entity is in a closed space", "label": 0}, {"captions": ["someone snores nearby", "a woman speaks as she rubs two objects together"], "sample_ids": ["spJCm8tD9Zo", "vzxHnu-SFEw"], "start_seconds": ["90", "80"], "properties": ["someone snores, nearby, someone", "two objects, woman, speak"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "water rushes and then a vehicle zooms past"], "sample_ids": ["wRBHTgrbiwg", "s4Uz1Ffgo04"], "start_seconds": ["50", "100"], "properties": ["bird, owl, speak", "water, rushes, vehicle"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is more active", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["zj2R0XoFr5k", "sSMl2vc3ek"], "start_seconds": ["50", "20"], "properties": ["airplane, boy, fly", "loud, multiple, distance"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", null], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "winds blows roughly as a vehicle races past"], "sample_ids": ["ylpYOorfH4o", "xjvTpk2Zpr8"], "start_seconds": ["410", "70"], "properties": ["engine, run, loud", "wind, blows, vehicle"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a child speaks in closed space", "a stream of water flows quickly"], "sample_ids": ["yW6FWLSLkx4", "wbHTKEJZyhc"], "start_seconds": ["40", "20"], "properties": ["child, space, speak", "stream, water, flow"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a waterfall is flowing and people are speaking "], "question": "which entity is moving", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["wwyfGO2J4", "uYT5gxnyMWM"], "start_seconds": ["90", "50"], "properties": ["people, applaud, hoot", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity is more violent", "label": 1}, {"captions": ["a woman speaks with water running", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["wTideSjRFS0", "ziUT9IFTkjg"], "start_seconds": ["30", "10"], "properties": ["water, running, woman", "background, birds, rustling"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "birds are chirping and a chime is ringing "], "question": "which entity has a bird in the background?", "label": 1}, {"captions": ["a train horn sounds as a railroad passing bell rings", "winds blows roughly as a vehicle races past"], "sample_ids": ["zgUgkpk78xU", "xjvTpk2Zpr8"], "start_seconds": ["70", "70"], "properties": ["horn, bell, train", "wind, blows, vehicle"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a helicopter engine idles continuously", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["ugHJF0hfYkg", "tdWhHV3X25Q"], "start_seconds": ["10", "60"], "properties": ["engine, idle, continuously", "applause, audience, yells"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking and a crowd is clapping"], "question": "which entity is not a person?", "label": 0}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["xNMovAf3o50", "ziUT9IFTkjg"], "start_seconds": ["0", "10"], "properties": ["rain, thunder, music", "background, birds, rustling"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", null], "captions_pred_audio": ["thunder and rain with music playing in the background ", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a church bell rings several times", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sUVVjE3Ucp8", "sLUnaPT5gM8"], "start_seconds": ["0", "0"], "properties": ["ring, bell, several", "loud, laughter, intermittent"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a church bell is ringing ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["wind noise makes sound into a microphone", "a male speaks and another male speaks"], "sample_ids": ["w8uLijTqtlU", "viuTg1M-dqg"], "start_seconds": ["70", "30"], "properties": ["wind, microphone, noise", "two males, speaking, male"], "captions_pred_video": ["footage is blurry and shaky", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a recording", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["wTjoRj1se3U", "uYT5gxnyMWM"], "start_seconds": ["390", "50"], "properties": ["engine, run, people", "a, scream, girl"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a jet engine is running and people are talking", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "a vehicle engine accelerating then running on idle"], "sample_ids": ["xZepNM9qcRA", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["background, motor, run", "engine, accelerate, idle"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a diesel truck engine runs continuously", "a car speeding up in the distance"], "sample_ids": ["sZvwOuuPGP0", "u0TrcHhkPQ"], "start_seconds": ["50", "20"], "properties": ["engine, diesel, truck", "distance, car, speed"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", null], "captions_pred_audio": ["a medium engine is running ", "a race car accelerates and revs its engine "], "question": "which vehicle is moving faster", "label": 0}, {"captions": ["a vehicle accelerates squealing tires", "a person snores loudly multiple times at a close distance"], "sample_ids": ["sd7xVssqlw", "sSMl2vc3ek"], "start_seconds": ["50", "20"], "properties": ["accelerates, tires, squealing", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["children cry and people talk", "a stream of water flows as people talk and wind blows"], "sample_ids": ["xLwHe825Zs", "xBxDz0CFVn0"], "start_seconds": ["18", "30"], "properties": ["people talk, children cry, people talk", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing as people talk and wind blows?", "label": 1}, {"captions": ["a infant makes noise and is excited", "a man is filing a hard object"], "sample_ids": ["wIJK3-5y0kA", "vveS8HT7Uog"], "start_seconds": ["30", "100"], "properties": ["noise, excited, infant", "a man, hard, object"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "footage is of a workbench with various tools on it including a hammer and a screwdriver"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is filing and speaking with background noise and breathing "], "question": "which object is harder to file", "label": 0}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "a stream of water runs briefly"], "sample_ids": ["siJFXfGWgDk", "x-PeY8Yb8M4"], "start_seconds": ["50", "300"], "properties": ["man, woman, vehicle", "stream, water, run"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "paper is crumpling consistently"], "sample_ids": ["ylpYOorfH4o", "v5cSxLaHADY"], "start_seconds": ["410", "0"], "properties": ["engine, run, loud", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking and an engine is revving", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "a consistent ticking pattern"], "sample_ids": ["wtDqrBygTcU", "sCeWURVHfOM"], "start_seconds": ["30", "30"], "properties": ["man, engine, run", "ticking, pattern, clock"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "- a close-up view of the clock's inner workings"], "captions_pred_audio": ["a man is speaking and a motor is running", "ticking of a clock"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "birds chirp and objects are moved around"], "sample_ids": ["tOj4tdLRaA", "yPUYU6t3rwo"], "start_seconds": ["70", "370"], "properties": ["woman, laugh, baby", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a baby laughs and a woman speaks", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a baby cries and wails as an adult female speaks", "a heavy rain falls endlessly"], "sample_ids": ["zliInBdC98Y", "wP8ZKrlx3oA"], "start_seconds": ["30", "40"], "properties": ["a, baby, cries, wails", "heavy, rain, fall"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", "footage of a flooded street in the middle of a desert with mountains in the background"], "captions_pred_audio": ["a baby cries and a woman speaks", "a heavy rain is falling on a surface"], "question": "which entity is falling", "label": 1}, {"captions": ["goats bleat and metal clings", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tH17JPjDPnc", "tdWhHV3X25Q"], "start_seconds": ["260", "60"], "properties": ["bleat, metal, clings", "applause, audience, yells"], "captions_pred_video": ["feed of the goats eating hay in the barn", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a train horn sounds as the train approaches", "a clock ticks quietly and rhythmically"], "sample_ids": ["slZLHwNbbt4", "u7C-AEBQM"], "start_seconds": ["300", "30"], "properties": ["train, horn, sound", "ticks, rhythmic, quiet"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", null], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a ticktock of a clock"], "question": "which entity is quieter", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "a car accelerates and wind blows"], "sample_ids": ["vSeGhaZt-aI", "u0TrcHhkPQ"], "start_seconds": ["50", "20"], "properties": ["water, bubbles, speak", "accelerates, wind, blows"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", null], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a infant makes noise and is excited", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wIJK3-5y0kA", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["noise, excited, infant", "music, gunfire, explosion"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a baby cries and a woman speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a child yells and another yells", "a man speaks as a car is passing by"], "sample_ids": ["vMDHu7Lxcgw", "sK4u5T8hW78"], "start_seconds": ["410", "30"], "properties": ["two, yell, child", "a, car, pass"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["an insect buzzes around continuously", "a toilet flushes and a female speaks"], "sample_ids": ["v25l1jef3JY", "yaln9y8I7ms"], "start_seconds": ["0", "230"], "properties": ["buzzes, continuously, insect", "female, flushes, toilet"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "footage is blurry and out of focus"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a toilet flushes and a man speaks"], "question": "which entity is not a person", "label": 0}, {"captions": ["an engine idles consistently before sputtering some", "some men converse over an engine running"], "sample_ids": ["rwTERCUno", "sCiy7QS1U"], "start_seconds": ["90", "300"], "properties": ["engine, idle, sputter", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["an engine is idling and vibrating", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaking with light rustling", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["zOZleIRqZm4", "sLUnaPT5gM8"], "start_seconds": ["80", "0"], "properties": ["light, rustling, man", "loud, laughter, intermittent"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["water quietly rushes by while birds chirp in the background", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["sYITalLZjj4", "wSVhSdj0F0"], "start_seconds": ["30", "10"], "properties": ["water, rushes, background, birds", "horn honks, keys jingle, electronic beep"], "captions_pred_video": ["two ducks are swimming in the water near each other", null], "captions_pred_audio": ["wind blows and birds chirp", "a car horn honks and keys jangle with background noise "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a vehicle accelerates and squeals tires", "water pouring and bubbling"], "sample_ids": ["yRx9txMcBl0", "uyRfq-jKPpo"], "start_seconds": ["40", "50"], "properties": ["accelerates, tires, squeals", "water, bubbles, pouring"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a car is revving its engine and skidding ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "some tunes played by whistling"], "sample_ids": ["uC9dtII1KDI", "u6BnG6YZqJ4"], "start_seconds": ["150", "0"], "properties": ["wind, gusts, distance", "tune, play, whistling"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a person whistling a song"], "question": "which entity is not playing a tune?", "label": 0}, {"captions": ["a dog barks and whimpers", "a frog croaks as other frogs croak in the background"], "sample_ids": ["sShpyu2l4YQ", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["barks, whimpers, dog", "background, frog, croak"], "captions_pred_video": ["the puppies are playing with a toy", "a close up of a frog in the water"], "captions_pred_audio": ["a dog is barking and growling", "a frog is croaking"], "question": "which entity is a frog?", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "some men converse over an engine running"], "sample_ids": ["s6DESzUTGjY", "sCiy7QS1U"], "start_seconds": ["16", "300"], "properties": ["wind, laugh, woman", "men, converse, engine"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", null], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a video of a woman laughing?", "label": 0}, {"captions": ["people speak in the background as a clock ticktocks", "pigeons vocalize and birds chirp"], "sample_ids": ["vZAw4apG0Es", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["background, clock, ticktocks", "vocalize, bird, chirp"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "of the pigeon in the cage"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "a car revs and accelerates loudly and men and women chatter among themselves"], "sample_ids": ["uZesmtKZGSw", "y8dSeubCNI"], "start_seconds": ["250", "4"], "properties": ["car, track, man", "men, women, car"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", null], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "an engine revving and people talking in the background"], "question": "which entity has more people", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "dishes cling together then a man begins to speak"], "sample_ids": ["vKrYfzleLB8", "sQGXqGcwOTc"], "start_seconds": ["110", "3"], "properties": ["a, ring, gunshots", "cling, speak, dishes"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "paper folding and crinkling"], "sample_ids": ["wjsXBsc7M40", "zPpG3RD8lSs"], "start_seconds": ["10", "20"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "paper, fold, crinkle"], "captions_pred_video": ["footage of the baby playing with a toothbrush", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a baby laughs and a woman speaks", "the wind blows and a mouse clicks "], "question": "which entity is more likely to be a video", "label": 0}, {"captions": ["a man is filing a hard object", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vveS8HT7Uog", "xBxDz0CFVn0"], "start_seconds": ["100", "30"], "properties": ["a man, hard, object", "stream, water, flow"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "some men converse over an engine running"], "sample_ids": ["s59PfAghdkM", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["bird, chirp, background, horse, neigh", "men, converse, engine"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", null], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is more likely to be found in a museum", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a woman speaks and other women and a man talk with her"], "sample_ids": ["v0x1odnXtP0", "vbpKkWvfOu4"], "start_seconds": ["210", "560"], "properties": ["keyboard, type, computer", "a, woman, man"], "captions_pred_video": ["how to make money on youtube in spanish", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a person is typing on a keyboard", "a woman is speaking and a man is speaking"], "question": "which is a group of people", "label": 1}, {"captions": ["a child speaks", "three men talk while wind blows and some liquid flows"], "sample_ids": ["yW6FWLSLkx4", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["a, child, speaks", "three men, wind, flow"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "an emergency vehicle engine runs then a horn blows and siren sounds"], "sample_ids": ["shmR4OZtzqA", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["man, engine, idle", "engine, horn, siren"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man speaks while a motor runs", "a truck is honking its horn and a siren is blaring "], "question": "which vehicle has a horn and siren?", "label": 1}, {"captions": ["a small engine idles continuously", "dishes cling together then a man begins to speak"], "sample_ids": ["y5WII6cTH7k", "sQGXqGcwOTc"], "start_seconds": ["40", "3"], "properties": ["engine, idle, continuously", "cling, speak, dishes"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["an engine is knocking and vibrating ", "mechanisms are operating and water is splashing "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "running water in a faucet with some clinks"], "sample_ids": ["yaln9y8I7ms", "zNRChLjqcU"], "start_seconds": ["230", "220"], "properties": ["female, flushes, toilet", "water, faucet, run"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a toilet flushes and a man speaks", "water is running from a faucet into a sink"], "question": "which entity is a faucet?", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "a stream of water runs briefly"], "sample_ids": ["sDSppXIlJrs", "x-PeY8Yb8M4"], "start_seconds": ["27", "300"], "properties": ["microphone, water, wind", "stream, water, run"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["an insect buzzes around continuously", "water is sprayed across a hard surface"], "sample_ids": ["v25l1jef3JY", "sQwlkXjQabo"], "start_seconds": ["0", "10"], "properties": ["buzzes, continuously, insect", "water, spray, surface"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vZAw4apG0Es", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["background, tick, repeat", "a woman, something, fried"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a clock is ticking and people are talking", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "a person sniffs and sneezes"], "sample_ids": ["yaln9y8I7ms", "uRlbY6aoBU"], "start_seconds": ["230", "0"], "properties": ["female, flushes, toilet", "sneezes, person, sniffs"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a toilet flushes and a man speaks", "a man is sneezing "], "question": "which entity is a person", "label": 1}, {"captions": ["a toilet flushes and water drains", "water pouring and bubbling"], "sample_ids": ["sfAvvZwdLCY", "uyRfq-jKPpo"], "start_seconds": ["20", "50"], "properties": ["water drains, flushes, water", "water, bubbles, pouring"], "captions_pred_video": ["footage of the toilet in the bathroom", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a toilet is flushed", "water is running from a faucet"], "question": "which entity is a source of water", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "birds chirp and objects are moved around"], "sample_ids": ["yZmhM1HcsyE", "yPUYU6t3rwo"], "start_seconds": ["4", "370"], "properties": ["engine, roar, water", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "insects buzz and a man speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks with water running", "an airplane engine runs"], "sample_ids": ["wTideSjRFS0", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["water, running, woman", "engine, airplane, runs"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "the clinking of a train bell with the humming of an engine and a train horn blowing"], "sample_ids": ["xyL9F5VrjkE", "zgUgkpk78xU"], "start_seconds": ["20", "70"], "properties": ["engine, run, wind", "clinking, humming, horn"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a train blows its horn as it speeds down the tracks "], "question": "which entity is a train?", "label": 1}, {"captions": ["a man speaks as horns blow", "water is sprayed across a hard surface"], "sample_ids": ["tHyNqRyK34A", "sQwlkXjQabo"], "start_seconds": ["24", "10"], "properties": ["a, man, speaks", "water, spray, surface"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["yRx9txMcBl0", "y2bVZ7rz-5M"], "start_seconds": ["40", "280"], "properties": ["accelerates, tires, squeals", "motor noise, horn, siren"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is a warning device", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "people speak as gunfire rings out"], "sample_ids": ["xzKKf9bKNUo", "wqTCwqVRDlk"], "start_seconds": ["10", "80"], "properties": ["background, noise, snoring", "gunfire, ring, speak"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wP8ZKrlx3oA", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["rain, storm, thunder", "three men, wind, flow"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a weather event", "label": 0}, {"captions": ["a propeller rotates loudly and intensely", "a man speaks over intermittent keyboard taps"], "sample_ids": ["ugHJF0hfYkg", "tw76HGONaKg"], "start_seconds": ["10", "570"], "properties": ["loud, intense, propeller", "audio, man, keyboard"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man speaks and types on a computer keyboard "], "question": "which is quieter", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "a consistent ticking pattern"], "sample_ids": ["sU53zg9Jp7s", "sCeWURVHfOM"], "start_seconds": ["380", "30"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "ticking, pattern, clock"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "- a close-up view of the clock's inner workings"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "ticking of a clock"], "question": "which entity is a clock?", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["zkKdxzNC97Y", "zl9Dqx-j7q4"], "start_seconds": ["27", "6"], "properties": ["loud, bang, noise", "engine, laugh, loud"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a door is opened and closed", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["electronic beeps occur in a short series", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["y682ml90jGw", "xKB8O8LTs6s"], "start_seconds": ["11", "70"], "properties": ["beeps, series, electronic", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a beeping sound is being made ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "wind blowing followed by a zoom"], "sample_ids": ["tw76HGONaKg", "vr8ZXjEBhMQ"], "start_seconds": ["570", "150"], "properties": ["A, game, keyboard", "wind, blow, zoom"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is not a zoom?", "label": 0}, {"captions": ["two frogs croak at each other", "several insects fly while two men talk"], "sample_ids": ["zg0X6BnhOLQ", "s-T9OVOiMLo"], "start_seconds": ["410", "330"], "properties": ["two frogs, croak, at each other", "several, fly, men"], "captions_pred_video": ["footage of lightning in the sky at night", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a frog is croaking", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "a duck quacks continuously"], "sample_ids": ["se87d6yxEOA", "vh30P49Po6s"], "start_seconds": ["10", "30"], "properties": ["run, whistle, pass", "quacks, continuously, duck"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "roadway noise occurs and a truck accelerates"], "sample_ids": ["xyL9F5VrjkE", "tgbONvsP47Y"], "start_seconds": ["20", "0"], "properties": ["engine, run, wind", "noise, truck, accelerate"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage of a fire truck entering a garage"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a car is driving on the road "], "question": "which truck is moving", "label": 1}, {"captions": ["birds chirp and objects are moved around", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["yPUYU6t3rwo", "tiDFTC-5vU"], "start_seconds": ["370", "30"], "properties": ["birds chirp, objects are moved around, birds", "male, duck, laugh"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", null], "captions_pred_audio": ["insects buzz and a man speaks", "a man is speaking and ducks are quacking"], "question": "which entity is about a duck?", "label": 1}, {"captions": ["a child speaks in closed space", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["yW6FWLSLkx4", "zFjIWfSD-4"], "start_seconds": ["40", "410"], "properties": ["child, space, speak", "People, motor, brakes"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", null], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is more likely to be in a closed space", "label": 0}, {"captions": ["a young woman speaks over spraying and another person yells", "a toilet flushes and water drains"], "sample_ids": ["uYT5gxnyMWM", "sfAvvZwdLCY"], "start_seconds": ["50", "20"], "properties": ["person, spray, yell", "water drains, flushes, water"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "a man speaks as a motor runs in the background"], "sample_ids": ["zofjfKhqLk8", "xZepNM9qcRA"], "start_seconds": ["10", "30"], "properties": ["noise, stop, motor", "background, motor, run"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man speaks while a motorcycle revs and accelerates "], "question": "which motor is running in the background", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "some men converse over an engine running"], "sample_ids": ["vs65y4qmyBE", "sCiy7QS1U"], "start_seconds": ["340", "300"], "properties": ["engine, run, man", "men, converse, engine"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows a man speaking to an engine?", "label": 0}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "small dogs yip and bark sharply"], "sample_ids": ["tDVADusiIoc", "v-wcQf4BDY0"], "start_seconds": ["60", "120"], "properties": ["wind, radio, waves", "bark, yip, sharply"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["shmR4OZtzqA", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["man, engine, idle", "a woman, a television program, a bird"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man speaks while a motor runs", "a woman is speaking and a dog is whimpering"], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["some people speak", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vbZ-0lGPneg", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "airplane, boy, fly"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "some light rustling followed by a loud burp and a girl speaking"], "sample_ids": ["xZepNM9qcRA", "vdoxuJn9lTc"], "start_seconds": ["30", "40"], "properties": ["background, motor, run", "burp, loud, girl"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "a group of young girls playing a video game together in a living room"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a child speaks followed by a burp"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a horn blasts as warning bells ring", "a woman speaks happily and an animal chirps"], "sample_ids": ["zgUgkpk78xU", "uWAAAL4CIoc"], "start_seconds": ["70", "0"], "properties": ["horn, bells, ring", "a woman, chirps, animal"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", null], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person speaks briefly", "wind blows as people chatter quietly"], "sample_ids": ["zOZleIRqZm4", "xBxDz0CFVn0"], "start_seconds": ["80", "30"], "properties": ["person, talk, brief", "wind, chatter, people"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["xKB8O8LTs6s", "tdWhHV3X25Q"], "start_seconds": ["70", "60"], "properties": ["music, radio, gunshots", "applause, audience, yells"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["yLy-WycbVVE", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["background, people, talk", "harsh, wind, blows"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "pigeons vocalize and birds chirp"], "sample_ids": ["siJFXfGWgDk", "uiS58TNyUiw"], "start_seconds": ["50", "430"], "properties": ["man, woman, vehicle", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["water flows and trickles", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tB7hWb9gTuQ", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["water, flow, trickle", "applause, audience, yells"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["water is splashing and gurgling", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["white noise and birds chirping", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["wRBHTgrbiwg", "ukg5L09Wpvo"], "start_seconds": ["50", "150"], "properties": ["noise, white, chirping", "clickety-clack, train, whistle"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a train blows its whistle and blows its horn "], "question": "which noise is continuous", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "a car accelerates and wind blows"], "sample_ids": ["sZPuqDgX2V0", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["engine, accelerate, intercom", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a race car accelerates and revs its engine "], "question": "which car accelerates", "label": 1}, {"captions": ["a man speaks, then dials a rotary telephone", "wind blows as people chatter quietly"], "sample_ids": ["tK4VlLsNxak", "xBxDz0CFVn0"], "start_seconds": ["120", "30"], "properties": ["a, dial, telephone", "wind, chatter, people"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["u21-Z5gJCB8", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["background, voice, man", "rooster, crow, background, men"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["a person is whistling", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sIXTftIuUgw", "wqZ135Ssz0"], "start_seconds": ["90", "60"], "properties": ["person, whistling, person", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person whistling a song", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["sQGXqGcwOTc", "ukg5L09Wpvo"], "start_seconds": ["3", "150"], "properties": ["audio, kid, giggles", "clickety-clack, train, whistle"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["xO-Q2BlIIPU", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["two men, exclamation, speak", "stream, water, flow"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with wind noise in the background "], "question": "which entity is a video of a stream of water flowing as people talk and wind blows?", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "someone whistles a tune"], "sample_ids": ["s3cTDAj31g", "sIXTftIuUgw"], "start_seconds": ["80", "90"], "properties": ["man, talk, woman", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a baby is crying", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "some tunes played by whistling"], "sample_ids": ["w5W5Kqtc8E", "u6BnG6YZqJ4"], "start_seconds": ["100", "0"], "properties": ["water, splashes, motorboat", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a heavy rain falls endlessly", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["wP8ZKrlx3oA", "ukg5L09Wpvo"], "start_seconds": ["40", "150"], "properties": ["heavy, rain, fall", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a train blows its whistle and blows its horn "], "question": "which entity is continuous", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "someone snores nearby"], "sample_ids": ["uEU-Hg5MTN8", "spJCm8tD9Zo"], "start_seconds": ["27", "90"], "properties": ["a woman, laughs, animal", "someone snores, nearby, someone"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["w8uLijTqtlU", "wqZ135Ssz0"], "start_seconds": ["70", "60"], "properties": ["wind, microphone, noise", "two men, woman, birds"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "a man speaks as a car is passing by"], "sample_ids": ["x5cuQjOdM3E", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["cat, talk, meow", "a, car, pass"], "captions_pred_video": ["a black background with an airplane flying in the sky", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a male speaks and another male speaks", "an airplane engine spools and people speak"], "sample_ids": ["viuTg1M-dqg", "wTjoRj1se3U"], "start_seconds": ["30", "390"], "properties": ["two males, speaking, male", "airplane, engine, spool"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a jet engine is running and people are talking"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["a motorcycle engine is revving while people are speaking", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["y8dSeubCNI", "uEU-Hg5MTN8"], "start_seconds": ["4", "27"], "properties": ["engine revving, people speaking, motorcycle", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["an engine revving and people talking in the background", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["goats bleat and people speak", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["z5iUE5h0EPs", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["goats bleat, people speak, language", "a woman, something, fried"], "captions_pred_video": ["of the goat in the barn", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a goat bleats and a man speaks", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a toilet flushes and water drains", "water flows as men speak and yell"], "sample_ids": ["sfAvvZwdLCY", "vJ7JPEFhyLA"], "start_seconds": ["20", "16"], "properties": ["water drains, flushes, water", "water, flow, men"], "captions_pred_video": ["footage of the toilet in the bathroom", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more water", "label": 1}, {"captions": ["paper is crumpling consistently", "plastic is tapped on while someone speaks"], "sample_ids": ["v5cSxLaHADY", "wvKpEYswXO0"], "start_seconds": ["0", "150"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "plastic, tap, speak"], "captions_pred_video": ["footage of the person holding a pair of scissors", "of the person preparing food in the kitchen"], "captions_pred_audio": ["paper is crumpled and crinkled", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is being tapped on", "label": 0}, {"captions": ["an electric engine works nearby followed by a child talking", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["xSKJGCItUWE", "uEU-Hg5MTN8"], "start_seconds": ["10", "27"], "properties": ["engine, work, child", "animal, grunts, snorts"], "captions_pred_video": ["footage of the helicopter flying in the room", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["ylpYOorfH4o", "uZesmtKZGSw"], "start_seconds": ["410", "250"], "properties": ["engine, running, wind", "men, talk, cars"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about cars?", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "a telephone rings followed by a woman talking"], "sample_ids": ["ul60S8TXDA8", "tGcFnX0GHI"], "start_seconds": ["60", "0"], "properties": ["sound, distance, bell", "ring, talk, woman"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", null], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a dial tone sounds followed by a woman speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["people speak and tapping occurs", "white noise and snoring with some rustling in the background"], "sample_ids": ["tFCUUGdREgA", "xzKKf9bKNUo"], "start_seconds": ["70", "10"], "properties": ["people, tap, speak", "background, noise, snoring"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "shows a woman laying on a bed with her eyes closed and her mouth open"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a person snoring loudly"], "question": "which entity is more quiet", "label": 1}, {"captions": ["there are rhythmical snoring nearby", "a clock ticktocks"], "sample_ids": ["ujMt0-D-x2k", "v-g-j2uTByM"], "start_seconds": ["0", "30"], "properties": ["snoring, rhythmical, nearby", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of the dog playing with a toy on the floor", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a person is snoring loudly", "a clock is ticking loudly"], "question": "which entity is not a clock?", "label": 0}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "people applaud and hoot and chat quietly"], "sample_ids": ["vddP56-ogds", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["water, splash, person, laugh", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a train horn sounds as it passes by", "a pigeon cooing as an insect buzzes by briefly"], "sample_ids": ["ukg5L09Wpvo", "yZrFNS7GFBQ"], "start_seconds": ["150", "30"], "properties": ["sound, train, horn", "pigeon, buzzes, insect"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "of the bird in the cage"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "an owl hoots in the background "], "question": "which entity is a bird?", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "a infant makes noise and is excited"], "sample_ids": ["vXlk0lIQBFo", "wIJK3-5y0kA"], "start_seconds": ["470", "30"], "properties": ["wind, speak, vocalize", "noise, excited, infant"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a man speaks as horns blow", "people cheer as a vehicle engine revs"], "sample_ids": ["tHyNqRyK34A", "xjhAnI2q6hM"], "start_seconds": ["24", "6"], "properties": ["a, man, speaks", "engine revs, vehicle, people"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "multiple adults speaking, and a child shouting in the background"], "sample_ids": ["ukg5L09Wpvo", "yks4cLgIDMc"], "start_seconds": ["150", "170"], "properties": ["a train, a horn, a bell", "background, speaking, child"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "footage of two kids wrestling on the floor"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a man is speaking and a child is crying"], "question": "which entity has a child shouting in the background?", "label": 1}, {"captions": ["a door opens and birds chirp", "a man speaks as a motor runs in the background"], "sample_ids": ["yeFvk9x0wWI", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["door, open, birds", "background, motor, run"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["uZesmtKZGSw", "uYT5gxnyMWM"], "start_seconds": ["250", "50"], "properties": ["men, talk, cars", "a, scream, girl"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has more people", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "a woman speaks as she rubs two objects together"], "sample_ids": ["yYEVLuqEytU", "vzxHnu-SFEw"], "start_seconds": ["40", "80"], "properties": ["grunt, slurp, background", "two objects, woman, speak"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["a train horn blares as a train passes, then fades", "people applaud and hoot and chat quietly"], "sample_ids": ["zVacuqSb4LI", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["blares, fades, train", "people, applaud, hoot"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", null], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "a telephone rings followed by a woman talking"], "sample_ids": ["rwtmaKiCcQU", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["nozzle, depressed, spray can", "ring, talk, woman"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", null], "captions_pred_audio": ["spraying and people speaking", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "people cheer as a vehicle engine revs"], "sample_ids": ["wRBHTgrbiwg", "xjhAnI2q6hM"], "start_seconds": ["50", "6"], "properties": ["bird, owl, speak", "engine revs, vehicle, people"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["various birds chirp and squeal, and an animal grunts", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["tDlysoZiA1I", "zl9Dqx-j7q4"], "start_seconds": ["0", "6"], "properties": ["animal, grunts, chirps", "engine, laugh, loud"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a small engine spits as it runs", "an infant crying as a woman laughs"], "sample_ids": ["sZvwOuuPGP0", "xhmRY9yhC7c"], "start_seconds": ["50", "20"], "properties": ["spits, engine, runs", "a, laugh, infant"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a medium engine is running ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["paper folding and crinkling", "a man speaks followed by another man speaking outside"], "sample_ids": ["zPpG3RD8lSs", "viuTg1M-dqg"], "start_seconds": ["20", "30"], "properties": ["paper, fold, crinkle", "two men, speak, follow"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of two men speaking?", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wyllXV6PjKo", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["a kid, talk, cry", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman speaks and a baby cries", "a man is speaking and a crowd is clapping"], "question": "which entity has more people", "label": 1}, {"captions": ["someone is snoring while sleeping", "wind blowing followed by a zoom"], "sample_ids": ["ujMt0-D-x2k", "vr8ZXjEBhMQ"], "start_seconds": ["0", "150"], "properties": ["snore, sleep, someone", "wind, blow, zoom"], "captions_pred_video": ["of the dog playing with a toy on the floor", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a person is snoring loudly", "wind blows and a chainsaw cuts through wood "], "question": "which entity is not a person", "label": 1}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "water flows as men speak and yell"], "sample_ids": ["vddP56-ogds", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["water, splash, person, laugh", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows water flowing?", "label": 1}, {"captions": ["water flows followed by women screaming", "a infant makes noise and is excited"], "sample_ids": ["w5W5Kqtc8E", "wIJK3-5y0kA"], "start_seconds": ["100", "30"], "properties": ["water, flow, women", "noise, excited, infant"], "captions_pred_video": [null, "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "a frog croaks as other frogs croak in the background"], "sample_ids": ["zuua6-5goWw", "yswmmRZFItk"], "start_seconds": ["30", "0"], "properties": ["birds, chirp, quiet, man, speaks", "background, frog, croak"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "a close up of a frog in the water"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a frog is croaking"], "question": "which entity is more quiet", "label": 0}, {"captions": ["wind blows strongly and a young man speaks", "a man speaks as a car is passing by"], "sample_ids": ["vs65y4qmyBE", "sK4u5T8hW78"], "start_seconds": ["340", "30"], "properties": ["wind, blows, strongly", "a, car, pass"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a man speaking to a car?", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "roadway noise occurs and a truck accelerates"], "sample_ids": ["xjhAnI2q6hM", "tgbONvsP47Y"], "start_seconds": ["6", "0"], "properties": ["engine revs, vehicle, people", "noise, truck, accelerate"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a car is driving on the road "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["a girl speaks followed by a scream and more girls talking", "distant men speak as a spray can nozzle is depressed"], "sample_ids": ["uYT5gxnyMWM", "rwtmaKiCcQU"], "start_seconds": ["50", "30"], "properties": ["a, scream, girl", "nozzle, depressed, spray can"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "shows a man spraying paint on a wall with a spray gun"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "spraying and people speaking"], "question": "which entity is about a spray can?", "label": 1}, {"captions": ["a drill runs and two people laugh", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["tEE3MpBt1sg", "tiDFTC-5vU"], "start_seconds": ["50", "30"], "properties": ["two people, laugh, drill", "male, duck, laugh"], "captions_pred_video": ["footage is blurry due to the smoke in the air", null], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man is speaking and ducks are quacking"], "question": "which entity has more people laughing", "label": 1}, {"captions": ["a motorcycle engine is idling", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vZAqdHZ81yA", "tDVADusiIoc"], "start_seconds": ["180", "60"], "properties": ["engine, motorcycle, idling", "water, radio, man"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["an engine is idling loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a motorcycle?", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vh30P49Po6s", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["loud, continuous, quacks", "engine, idle, woman"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a duck is quacking loudly", "a woman is speaking and a subway train is moving "], "question": "which entity is quieter", "label": 1}, {"captions": ["a weapon fires multiple times", "a vehicle engine revs and tires squeal"], "sample_ids": ["sMC07Ucy7kg", "yDoT73BWsdA"], "start_seconds": ["10", "10"], "properties": ["weapon, fire, multiple", "engine revs, tires squeal, vehicle"], "captions_pred_video": ["footage is from a car's point of view", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a race car accelerates and revs its engine "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["music plays followed by gunshots and then an explosion", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["xKB8O8LTs6s", "yDoT73BWsdA"], "start_seconds": ["70", "10"], "properties": ["music, gunshots, explosion", "engine, revs, vehicle"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a bird is chirping and tweeting a bird song", "a car accelerates and wind blows"], "sample_ids": ["wPz6QRAkEb4", "u0TrcHhkPQ"], "start_seconds": ["60", "20"], "properties": ["chirps, tweets, song", "accelerates, wind, blows"], "captions_pred_video": ["a bird in a cage on top of a pole", null], "captions_pred_audio": ["birds are chirping in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["people speak then an engine runs", "a toilet flushes and a female speaks"], "sample_ids": ["uMTTDZ2mb4", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["engine, run, people", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a toilet flushes and a man speaks"], "question": "which entity is a video of a toilet flushing?", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["x9JovgqUcs", "uZesmtKZGSw"], "start_seconds": ["500", "250"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "a duck quacks continuously"], "sample_ids": ["xV7Mg1QucSc", "vh30P49Po6s"], "start_seconds": ["14", "30"], "properties": ["alarm, ticktocks, laughs", "quacks, continuously, duck"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["goats bleat and people speak", "water pouring and bubbling"], "sample_ids": ["z5iUE5h0EPs", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["goats bleat, people speak, language", "water, bubbles, pouring"], "captions_pred_video": ["of the goat in the barn", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a goat bleats and a man speaks", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "some tunes played by whistling"], "sample_ids": ["vSeGhaZt-aI", "u6BnG6YZqJ4"], "start_seconds": ["50", "0"], "properties": ["water, bubbles, run", "tune, play, whistling"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a woman speaks and is crumpling paper", "an infant crying frantically"], "sample_ids": ["xvDdE3zNf8Y", "zwOBqeFTgiU"], "start_seconds": ["120", "30"], "properties": ["A, crumple, paper", "cry, infant, frantically"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "of the baby crying in the car seat"], "captions_pred_audio": ["a woman speaks and crumples paper", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["someone snores nearby", "vehicles pass by on a roadway"], "sample_ids": ["spJCm8tD9Zo", "tgbONvsP47Y"], "start_seconds": ["90", "0"], "properties": ["someone snores, nearby, someone", "pass, vehicle, roadway"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a person is snoring loudly", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a baby cries and a woman moans", "a child speaks"], "sample_ids": ["smDKStoHBJo", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["a, cry, woman", "a, child, speaks"], "captions_pred_video": ["a man holding a crying baby in his arms", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a child?", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["zALy31PjDl0", "sLUnaPT5gM8"], "start_seconds": ["21", "0"], "properties": ["a man, a vehicle, a horn", "loud, laughter, intermittent"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "a propeller rotates loudly and intensely"], "sample_ids": ["y2ZBGpgbhHM", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["birds, tweet, pant", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["birds chirping and a dog panting", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a child babbles as a woman speaks", "a woman speaks as she rubs two objects together"], "sample_ids": ["wEBlkGWVWwE", "vzxHnu-SFEw"], "start_seconds": ["260", "80"], "properties": ["a, babble, woman", "two objects, woman, speak"], "captions_pred_video": ["shows a person writing on the whiteboard", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which woman speaks as she rubs two objects together?", "label": 1}, {"captions": ["birds chirp and a dog breathes heavily", "a child speaks"], "sample_ids": ["y2ZBGpgbhHM", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["dog, chirp, breathe", "a, child, speaks"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["birds chirping and a dog panting", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["water bubbles and gurgles.", "a car accelerates and wind blows"], "sample_ids": ["tB7hWb9gTuQ", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["bubbles, gurgles, water", "accelerates, wind, blows"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", null], "captions_pred_audio": ["water is splashing and gurgling", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "vehicles pass by on a roadway"], "sample_ids": ["w0xsN8X18Y", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["music, surface, rain", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a car is driving on the road "], "question": "which entity is more likely to be in motion", "label": 1}, {"captions": ["water gurgles, metal squeaks and the water stops", "a car accelerates and wind blows"], "sample_ids": ["x4a9YGIw4ok", "u0TrcHhkPQ"], "start_seconds": ["120", "20"], "properties": ["water, gurgles, stops", "accelerates, wind, blows"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a toilet flushes and water splashes", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["birds chirp and wind blows", "wind blows as people chatter quietly"], "sample_ids": ["sxIvBMSavMQ", "xBxDz0CFVn0"], "start_seconds": ["210", "30"], "properties": ["birds, chirp, wind", "wind, chatter, people"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "footage is blurry and out of focus"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["y2bVZ7rz-5M", "ukg5L09Wpvo"], "start_seconds": ["280", "150"], "properties": ["engine, horn, siren", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a train blows its whistle and blows its horn "], "question": "which entity is a train", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "a man speaks as insects buzz and a bird chirps"], "sample_ids": ["sOa7g-44Dag", "t25U-v4k4ts"], "start_seconds": ["30", "40"], "properties": ["background, man, spray", "a, chirps, bird"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a man is speaking and bees are buzzing"], "question": "which entity has a bird chirp?", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vZAw4apG0Es", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["background, clock, ticktocks", "a woman, something, fried"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a clock is ticking and people are talking", "a woman is speaking while food is frying in the background"], "question": "which entity has a clock ticktocking in the background?", "label": 0}, {"captions": ["an adult woman and an adult man speak", "a small engine spits as it runs"], "sample_ids": ["zTLVJCo4WEE", "sZvwOuuPGP0"], "start_seconds": ["30", "50"], "properties": ["two people, adult, speak", "spits, engine, runs"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "of a bulldozer clearing a road in a forest stock footage and royalty-free videos"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a medium engine is running "], "question": "which entity is not a person", "label": 1}, {"captions": ["an insect buzzes around continuously", "a vehicle engine accelerating then running on idle"], "sample_ids": ["v25l1jef3JY", "vYkA3cfXp5Q"], "start_seconds": ["0", "30"], "properties": ["buzzes, continuously, insect", "engine, accelerate, idle"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "an engine is idling"], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a male speaks over some small clicks", "paper is crumpling consistently"], "sample_ids": ["uXxVebHsGZ8", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["male, clicks, speak", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xKB8O8LTs6s", "tDVADusiIoc"], "start_seconds": ["70", "60"], "properties": ["music, gunfire, explosion", "water, radio, man"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["animals bleat and cry out and then a woman speaks", "water flows and trickles"], "sample_ids": ["yZp6xizR0yU", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["animal, bleat, cry", "water, flow, trickle"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "a man speaking with light rustling"], "sample_ids": ["wztCSUxOf8", "zOZleIRqZm4"], "start_seconds": ["130", "80"], "properties": ["a crowd, yells, applauds", "light, rustling, man"], "captions_pred_video": [null, "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a man is speaking with crickets chirping in the background"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "water flows as men speak and yell"], "sample_ids": ["yRx9txMcBl0", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["accelerates, tires, squeals", "water, flow, men"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a vehicle?", "label": 0}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["wSVhSdj0F0", "vfYTJq7nU"], "start_seconds": ["10", "130"], "properties": ["horn honks, keys jingle, slam", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a duck quacks and a woman speaks"], "question": "which entity is more likely to be heard in a car", "label": 0}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "some men converse over an engine running"], "sample_ids": ["y2bVZ7rz-5M", "sCiy7QS1U"], "start_seconds": ["280", "300"], "properties": ["motor noise, horn, siren", "men, converse, engine"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", null], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a conversation between men?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a man speaks as a car is passing by"], "sample_ids": ["sfAvvZwdLCY", "sK4u5T8hW78"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "a, car, pass"], "captions_pred_video": ["footage of the toilet in the bathroom", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["ukxt9I7eMMg", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["continuous, woman, speaking", "loud, jet engine, roar"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "someone whistles a tune"], "sample_ids": ["zkKdxzNC97Y", "sIXTftIuUgw"], "start_seconds": ["27", "90"], "properties": ["loud, bang, noise", "someone, tune, whistle"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "a person whistling a song"], "question": "which entity is softer", "label": 1}, {"captions": ["a dog barks and whimpers", "vehicles pass by on a roadway"], "sample_ids": ["sShpyu2l4YQ", "tgbONvsP47Y"], "start_seconds": ["0", "0"], "properties": ["barks, whimpers, dog", "pass, vehicle, roadway"], "captions_pred_video": ["the puppies are playing with a toy", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a dog is barking and growling", "a car is driving on the road "], "question": "which entity is more passive", "label": 1}, {"captions": ["someone snores nearby", "some tunes played by whistling"], "sample_ids": ["spJCm8tD9Zo", "u6BnG6YZqJ4"], "start_seconds": ["90", "0"], "properties": ["someone snores, nearby, someone", "tune, play, whistling"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a person is snoring loudly", "a person whistling a song"], "question": "which entity is playing tunes", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "paper is crumpling consistently"], "sample_ids": ["vSeGhaZt-aI", "v5cSxLaHADY"], "start_seconds": ["50", "0"], "properties": ["water, bubbles, speak", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "an insect buzzes around continuously"], "sample_ids": ["wRV8yMk886E", "v25l1jef3JY"], "start_seconds": ["0", "0"], "properties": ["liquid, spray, nozzle", "buzzes, continuously, insect"], "captions_pred_video": ["two cars are parked in a parking lot at night", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["an insect buzzes around continuously", "a person sniffles and then sneezes in the distance"], "sample_ids": ["v25l1jef3JY", "uRlbY6aoBU"], "start_seconds": ["0", "0"], "properties": ["buzzes, continuously, insect", "a, distance, sneeze"], "captions_pred_video": ["a black background with a cartoon character in the foreground", null], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a man is sneezing "], "question": "which entity is not a person?", "label": 0}, {"captions": ["rain falls on a surface as men speak and music plays", "an adult male speaks and dials a rotary phone"], "sample_ids": ["w0xsN8X18Y", "tK4VlLsNxak"], "start_seconds": ["30", "120"], "properties": ["music, surface, rain", "An adult male speaks, dials, and speaks into a rotary phone"], "captions_pred_video": [null, "person is wearing a headset and holding a remote control in his hand"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a man is speaking and using a sewing machine"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["small dogs yip and bark sharply", "vehicles pass by on a roadway"], "sample_ids": ["v-wcQf4BDY0", "tgbONvsP47Y"], "start_seconds": ["120", "0"], "properties": ["bark, yip, sharply", "pass, vehicle, roadway"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a dog barks and growls", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a male speaks and another male speaks", "a car accelerates and wind blows"], "sample_ids": ["viuTg1M-dqg", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["two males, speaking, male", "accelerates, wind, blows"], "captions_pred_video": ["footage of water coming out of a hole in the ground", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "wind blows as people chatter quietly"], "sample_ids": ["vimzuGQvdcU", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["a, man, yells", "wind, chatter, people"], "captions_pred_video": ["a group of people are rafting down a river", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "an infant crying frantically"], "sample_ids": ["wz7N8YRy74I", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["rooster, crow, background, men", "cry, infant, frantically"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a toilet flushes and water drains", "someone is typing on a computer keyboard"], "sample_ids": ["sfAvvZwdLCY", "v0x1odnXtP0"], "start_seconds": ["20", "210"], "properties": ["water drains, flushes, water", "keyboard, type, computer"], "captions_pred_video": ["footage of the toilet in the bathroom", "how to make money on youtube in spanish"], "captions_pred_audio": ["a toilet is flushed", "a person is typing on a keyboard"], "question": "which object is used to type on a computer", "label": 1}, {"captions": ["a man speaks uses a drill", "a duck quacks loudly and continuously"], "sample_ids": ["x5eIC7S0fbg", "vh30P49Po6s"], "start_seconds": ["60", "30"], "properties": ["A man is speaking, uses a drill, and is a tool", "loud, continuous, quacks"], "captions_pred_video": ["a person in surgical gloves is using a needle to remove a small object from a tooth", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking and using a power tool ", "a duck is quacking loudly"], "question": "which entity is a tool", "label": 0}, {"captions": ["a small engine idles continuously", "winds blows roughly as a vehicle races past"], "sample_ids": ["y5WII6cTH7k", "xjvTpk2Zpr8"], "start_seconds": ["40", "70"], "properties": ["engine, idle, continuously", "wind, blows, vehicle"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a jet engine roars and wind blows "], "question": "which entity is moving", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zuua6-5goWw", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["birds, chirp, quiet, man, speaks", "a woman, something, fried"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "- a woman cooking in the kitchen"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a person screams glaringly", "a goat bleats as a person speaks"], "sample_ids": ["xC8kbrKJmco", "tPJvjq9QePY"], "start_seconds": ["0", "40"], "properties": ["glaringly, screams, person", "bleats, person, speak"], "captions_pred_video": [null, "a dog and a sheep in a barn"], "captions_pred_audio": ["a goat is bleating ", "a baby cries and a man speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "some tunes played by whistling"], "sample_ids": ["u7C-AEBQM", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["ticks, rhythmic, quiet", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a ticktock of a clock", "a person whistling a song"], "question": "which entity is not playing a tune?", "label": 0}, {"captions": ["a door opens and birds chirp", "winds blows roughly as a vehicle races past"], "sample_ids": ["yeFvk9x0wWI", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["door, open, birds", "wind, blows, vehicle"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["a man is filing a hard object", "a stream of water runs briefly"], "sample_ids": ["vveS8HT7Uog", "x-PeY8Yb8M4"], "start_seconds": ["100", "300"], "properties": ["a man, hard, object", "stream, water, run"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a car is driving on a wet road "], "question": "which entity is not a stream of water?", "label": 0}, {"captions": ["a fly buzzes around loudly as birds chirp", "a kid speaks followed by music playing"], "sample_ids": ["uJV8NDaHqqk", "tQWGZLItBXk"], "start_seconds": ["100", "170"], "properties": ["loud, fly, chirp", "music, kid, speak"], "captions_pred_video": ["a bee hive in a wooden box", "worms revolution screenshots"], "captions_pred_audio": ["a swarm of bees buzzing around", "a child speaks music plays video game sounds sound effects and sound effects play "], "question": "which entity is quieter", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "small dogs yip and bark sharply"], "sample_ids": ["u7C-AEBQM", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["ticks, rhythmic, quiet", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a ticktock of a clock", "a dog barks and growls"], "question": "which entity is louder", "label": 1}, {"captions": ["long loud burping by a man", "a clock ticktocks"], "sample_ids": ["xmiUIOhtZyQ", "v-g-j2uTByM"], "start_seconds": ["60", "30"], "properties": ["loud, burp, man", "ticktocks, clock, ticktocks"], "captions_pred_video": ["homer simpson drinking a beer", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a person burps and music plays in the background ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "a duck quacks continuously"], "sample_ids": ["xyL9F5VrjkE", "vh30P49Po6s"], "start_seconds": ["20", "30"], "properties": ["wind, motor, distance", "quacks, continuously, duck"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "small dogs yip and bark sharply"], "sample_ids": ["vddP56-ogds", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["water, flow, laugh", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "some men converse over an engine running"], "sample_ids": ["vimzuGQvdcU", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["a, man, yells", "men, converse, engine"], "captions_pred_video": ["a group of people are rafting down a river", null], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is more quiet", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["zFjIWfSD-4", "su6FAOcOA8c"], "start_seconds": ["410", "4"], "properties": ["People, motor, brakes", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wsHBIgzs9Fs", "tdWhHV3X25Q"], "start_seconds": ["50", "60"], "properties": ["horn, continuous, buzzing", "applause, audience, yells"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a man is speaking and a crowd is clapping"], "question": "which entity is a response to a performance", "label": 1}, {"captions": ["a weapon fires multiple times", "vehicles pass by on a roadway"], "sample_ids": ["sMC07Ucy7kg", "tgbONvsP47Y"], "start_seconds": ["10", "0"], "properties": ["weapon, fire, multiple", "pass, vehicle, roadway"], "captions_pred_video": ["footage is from a car's point of view", "footage of a fire truck entering a garage"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a car is driving on the road "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "a child speaks in closed space"], "sample_ids": ["ylpYOorfH4o", "yW6FWLSLkx4"], "start_seconds": ["410", "40"], "properties": ["engine, run, loud", "child, space, speak"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tOj4tdLRaA", "tdWhHV3X25Q"], "start_seconds": ["70", "60"], "properties": ["woman, laugh, baby", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "a vehicle engine accelerating then running on idle"], "sample_ids": ["sAam2NqGhLY", "vYkA3cfXp5Q"], "start_seconds": ["20", "30"], "properties": ["snoring, breathing, child", "engine, accelerate, idle"], "captions_pred_video": ["of a little girl sleeping on a couch", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a person is snoring", "an engine is idling"], "question": "which entity is not a person", "label": 1}, {"captions": ["a man speaks as crickets sing", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["ryFDPxgDOGc", "vbZ-0lGPneg"], "start_seconds": ["570", "30"], "properties": ["a, crickets, sing", "a woman, a television program, a bird"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking and a dog is whimpering"], "question": "which entity has a man speaking as crickets sing?", "label": 0}, {"captions": ["a muffled toilet flushes and the water drains", "race cars go around a track as a man commentates"], "sample_ids": ["sfAvvZwdLCY", "uZesmtKZGSw"], "start_seconds": ["20", "250"], "properties": ["flushes, drains, water", "car, track, man"], "captions_pred_video": ["footage of the toilet in the bathroom", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "an animal growls followed by birds chirping"], "sample_ids": ["tOj4tdLRaA", "y2ZBGpgbhHM"], "start_seconds": ["70", "30"], "properties": ["woman, laugh, baby", "animal, growl, bird"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "birds chirping and a dog panting"], "question": "which entity is more likely to be a solitary event", "label": 1}, {"captions": ["a woman talking as an infant is crying", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["tMbMDvT50j8", "zFjIWfSD-4"], "start_seconds": ["12", "410"], "properties": ["a, talk, infant", "People, motor, brakes"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a woman talking to an infant?", "label": 0}, {"captions": ["an insect buzzes around continuously", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["v25l1jef3JY", "zFjIWfSD-4"], "start_seconds": ["0", "410"], "properties": ["buzzes, continuously, insect", "People, motor, brakes"], "captions_pred_video": ["a black background with a cartoon character in the foreground", null], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is not a person?", "label": 0}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["uPDn2BFTHk", "uYT5gxnyMWM"], "start_seconds": ["140", "50"], "properties": ["lady, laugh, baby", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a woman is speaking and a baby is crying"], "question": "which entity is more violent", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "pigeons vocalize and birds chirp"], "sample_ids": ["xzKKf9bKNUo", "uiS58TNyUiw"], "start_seconds": ["10", "430"], "properties": ["background, noise, snoring", "vocalize, bird, chirp"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "of the pigeon in the cage"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["ticking continues without interruption", "people speak as gunfire rings out"], "sample_ids": ["v-g-j2uTByM", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["ticking, continuous, clock", "gunfire, ring, speak"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a clock is ticking loudly", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be interrupted", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "a car speeding up in the distance"], "sample_ids": ["sEprKHm8Sj8", "u0TrcHhkPQ"], "start_seconds": ["90", "20"], "properties": ["car, tires, slows", "distance, car, speed"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a race car accelerates and revs its engine "], "question": "which car is speeding up in the distance", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "water flows as men speak and yell"], "sample_ids": ["zkKdxzNC97Y", "vJ7JPEFhyLA"], "start_seconds": ["27", "16"], "properties": ["loud, bang, noise", "water, flow, men"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a man speaking and yelling?", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "water flows and trickles"], "sample_ids": ["vh30P49Po6s", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["loud, continuous, quacks", "water, flow, trickle"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a duck is quacking loudly", "water is splashing and gurgling"], "question": "which entity is quieter", "label": 1}, {"captions": ["a dog barks and whimpers", "a woman speaks happily and an animal chirps"], "sample_ids": ["sShpyu2l4YQ", "uWAAAL4CIoc"], "start_seconds": ["0", "0"], "properties": ["barks, whimpers, dog", "a woman, chirps, animal"], "captions_pred_video": ["the puppies are playing with a toy", null], "captions_pred_audio": ["a dog is barking and growling", "a woman is speaking and a dog is barking "], "question": "which entity is a bird?", "label": 1}, {"captions": ["pigeons vocalize and birds chirp", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["uiS58TNyUiw", "wqZ135Ssz0"], "start_seconds": ["430", "60"], "properties": ["vocalize, bird, chirp", "two men, woman, birds"], "captions_pred_video": ["of the pigeon in the cage", null], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a video of birds?", "label": 0}, {"captions": ["someone is snoring while sleeping", "pigeons vocalize and birds chirp"], "sample_ids": ["ujMt0-D-x2k", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["snore, sleep, someone", "vocalize, bird, chirp"], "captions_pred_video": ["of the dog playing with a toy on the floor", "of the pigeon in the cage"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a horn honks and then loudly blares", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wnpJndXuxLc", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["horn, honk, loud", "three men, wind, flow"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["water splashes and a door squeaks", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["sdXV-ylviw", "zFjIWfSD-4"], "start_seconds": ["190", "410"], "properties": ["sound, splash, door", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dog barks and taps with background noise ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a door that squeaks?", "label": 0}, {"captions": ["a telephone rings followed by a woman talking", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["tGcFnX0GHI", "wqZ135Ssz0"], "start_seconds": ["0", "60"], "properties": ["ring, talk, woman", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people talking", "label": 1}, {"captions": ["bees buzz as wind blows", "a man speaks as a car is passing by"], "sample_ids": ["tMJne1a4AFI", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["bees, buzz, wind", "a, car, pass"], "captions_pred_video": ["a swarm of bees on the ground", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking with background noise and breathing sounds "], "question": "which entity is moving", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "water flows as men speak and yell"], "sample_ids": ["vms5XGTDVQc", "vJ7JPEFhyLA"], "start_seconds": ["220", "16"], "properties": ["paper, crumpled, crinkled", "water, flow, men"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["paper is crumpled and crinkled", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a moving object", "label": 1}, {"captions": ["dogs barking and whimpering", "a man speaks as a car is passing by"], "sample_ids": ["tIY7qOV3rEM", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["barking, whimpering, dog", "a, car, pass"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more passive", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["sOa7g-44Dag", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["audio, scratching, man", "male, duck, laugh"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", null], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking?", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "a man talks as something metal hits against and glass is set down"], "sample_ids": ["zuua6-5goWw", "x6ijhqRY38s"], "start_seconds": ["30", "250"], "properties": ["birds, chirp, quiet, man, speaks", "something metal, glass, hit"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "a chef preparing a dish with a bottle of wine and a plate of food on a table"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a man is speaking and dishes are clanging "], "question": "which entity is about a man talking?", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "a woman speaks in a fast tone with a male"], "sample_ids": ["vKrYfzleLB8", "sTpirNYo8vQ"], "start_seconds": ["110", "30"], "properties": ["a, ring, gunshots", "a, tone, fast"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", "of a man taking a selfie on a bus"], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "a man is speaking while a car is revving and accelerating "], "question": "which entity has a man yell?", "label": 0}, {"captions": ["dogs bark as an engine runs and a person whistles", "a stream of water runs briefly"], "sample_ids": ["zY3icUyMdh8", "x-PeY8Yb8M4"], "start_seconds": ["20", "300"], "properties": ["dog, bark, engine", "stream, water, run"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "a female speaks softly as paper crinkles"], "sample_ids": ["xjhAnI2q6hM", "xvDdE3zNf8Y"], "start_seconds": ["6", "120"], "properties": ["engine revs, vehicle, people", "a, female, speaks"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "of a woman in a white shirt and glasses holding a purple tie"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a woman speaks and crumples paper"], "question": "which entity is a person", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "birds chirp and objects are moved around"], "sample_ids": ["t8CV69hcvF0", "yPUYU6t3rwo"], "start_seconds": ["210", "370"], "properties": ["person, sneeze, follow", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a woman sneezes and speaks", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["an engine runs loudly", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vqZuVbG6-HI", "yajyRTUQk3U"], "start_seconds": ["130", "400"], "properties": ["loud, engine, run", "a woman, something, fried"], "captions_pred_video": ["footage is blurry because it's raining outside", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "pigeons vocalize and birds chirp"], "sample_ids": ["s59PfAghdkM", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["bird, chirp, background, horse, neigh", "vocalize, bird, chirp"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "of the pigeon in the cage"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["waves crash against a shoreline and people speak", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["yFB25fqfU8I", "yajyRTUQk3U"], "start_seconds": ["300", "400"], "properties": ["wave, crash, shoreline", "a woman, something, fried"], "captions_pred_video": ["footage of a person surfing in the ocean", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of something being fried?", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "several insects fly while two men talk"], "sample_ids": ["vzxHnu-SFEw", "s-T9OVOiMLo"], "start_seconds": ["80", "330"], "properties": ["two objects, woman, speak", "several, fly, men"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a woman speaking as she rubs two objects together?", "label": 0}, {"captions": ["dogs bark as an engine runs and a person whistles", "birds coo incessantly"], "sample_ids": ["zY3icUyMdh8", "yZrFNS7GFBQ"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "coo, bird, incessant"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "of the bird in the cage"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "an owl hoots in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a dark barks and whimpers", "water is sprayed across a hard surface"], "sample_ids": ["sYj4hpDUZDQ", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["barks, whimpers, dark", "water, spray, surface"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a dog barks and a cat meows", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["food is frying and sizzles", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["zNRChLjqcU", "w5W5Kqtc8E"], "start_seconds": ["220", "100"], "properties": ["food is frying, sizzles, food", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running from a faucet into a sink", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a duck quacks loudly and continuously", "a woman speaks happily and an animal chirps"], "sample_ids": ["vh30P49Po6s", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["loud, continuous, quacks", "a woman, chirps, animal"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "a woman is speaking and a dog is barking "], "question": "which entity is quieter", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["yswmmRZFItk", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["background, frog, croak", "a woman, something, fried"], "captions_pred_video": ["a close up of a frog in the water", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a frog is croaking", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["sU53zg9Jp7s", "uEU-Hg5MTN8"], "start_seconds": ["380", "27"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "a woman, laughs, animal"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking and laughing?", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["s59PfAghdkM", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["bird, chirp, background, horse, neigh", "airplane, boy, fly"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a woman speaks while a helicopter flies overhead "], "question": "which entity has a boy speaking?", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "frogs croak and vocalize"], "sample_ids": ["vb1fPSDI4c", "yswmmRZFItk"], "start_seconds": ["30", "0"], "properties": ["multiple, people, yell", "croak, vocalize, frog"], "captions_pred_video": [null, "a close up of a frog in the water"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a frog is croaking"], "question": "which entity is a frog?", "label": 1}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vlS6YMeWAPo", "sSMl2vc3ek"], "start_seconds": ["40", "20"], "properties": ["sheep, baa, birds", "loud, multiple, distance"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", null], "captions_pred_audio": ["a goat bleats and birds chirp", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "a woman speaks happily and an animal chirps"], "sample_ids": ["ugHJF0hfYkg", "uWAAAL4CIoc"], "start_seconds": ["10", "0"], "properties": ["loud, intense, propeller", "a woman, chirps, animal"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and a dog is barking "], "question": "which entity is quieter", "label": 1}, {"captions": ["an adult woman and an adult man speak", "a man speaks as a car is passing by"], "sample_ids": ["zTLVJCo4WEE", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["two people, adult, speak", "a, car, pass"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["vfYTJq7nU", "w34HjHr6gAY"], "start_seconds": ["130", "30"], "properties": ["rustling, ducks, quack", "beeps, hit, woman"], "captions_pred_video": [null, "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a beep sounds followed by a child speaking"], "question": "which entity is more quiet", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "wind blowing followed by a zoom"], "sample_ids": ["y2bVZ7rz-5M", "vr8ZXjEBhMQ"], "start_seconds": ["280", "150"], "properties": ["engine, horn, siren", "wind, blow, zoom"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a man speaking together with birds chirping and distant murmuring", "people applaud and hoot and chat quietly"], "sample_ids": ["uiS58TNyUiw", "wwyfGO2J4"], "start_seconds": ["430", "90"], "properties": ["audio, man, speaking", "people, applaud, hoot"], "captions_pred_video": ["of the pigeon in the cage", null], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "a man speaks then blows a vehicle horn as wind blows"], "sample_ids": ["zofjfKhqLk8", "zALy31PjDl0"], "start_seconds": ["10", "21"], "properties": ["background, metal, clank", "a man, a vehicle, a horn"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "a motorcycle is parked on the side of a brick walkway"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking and a car horn is honking"], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["uYT5gxnyMWM", "uYT5gxnyMWM"], "start_seconds": ["50", "50"], "properties": ["female, spraying, scream", "a, scream, girl"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking and a baby is crying"], "question": "which entity has a scream", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "some tunes played by whistling"], "sample_ids": ["rwtmaKiCcQU", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["nozzle, depressed, spray can", "tune, play, whistling"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["spraying and people speaking", "a person whistling a song"], "question": "which entity is playing tunes", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["rwtmaKiCcQU", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["nozzle, depressed, spray can", "wind, blow, vehicle"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", null], "captions_pred_audio": ["spraying and people speaking", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["several insects fly while two men talk", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["s-T9OVOiMLo", "xKB8O8LTs6s"], "start_seconds": ["330", "70"], "properties": ["several, fly, men", "music, gunfire, explosion"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["su6FAOcOA8c", "zj2R0XoFr5k"], "start_seconds": ["4", "50"], "properties": ["engine, idle, woman", "airplane, boy, fly"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a machine beeps continuously", "a clock ticktocks"], "sample_ids": ["y682ml90jGw", "v-g-j2uTByM"], "start_seconds": ["11", "30"], "properties": ["beeps, machine, continuously", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a beeping sound is being made ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["water splashes as an animal walks through", "a toilet flushes and a female speaks"], "sample_ids": ["w1ir-sZ3Im8", "yaln9y8I7ms"], "start_seconds": ["90", "230"], "properties": ["animal, water, splashes", "female, flushes, toilet"], "captions_pred_video": ["footage of a group of people riding horses through a river", "footage is blurry and out of focus"], "captions_pred_audio": ["water splashes and gurgles as people speak", "a toilet flushes and a man speaks"], "question": "which entity is a video of a toilet flushing?", "label": 1}, {"captions": ["a bird is chirping and tweeting a bird song", "a clock ticktocks briefly"], "sample_ids": ["wPz6QRAkEb4", "u7C-AEBQM"], "start_seconds": ["60", "30"], "properties": ["chirps, tweets, song", "ticktocks, clock, ticktocks briefly"], "captions_pred_video": ["a bird in a cage on top of a pole", null], "captions_pred_audio": ["birds are chirping in the background ", "a ticktock of a clock"], "question": "which entity is silent", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vW4x7S1VfQc", "yajyRTUQk3U"], "start_seconds": ["150", "400"], "properties": ["clacking, oil, woman", "a woman, something, fried"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "- a woman cooking in the kitchen"], "captions_pred_audio": ["food sizzles in a frying pan", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 0}, {"captions": ["a woman speaks and then a man speaks", "someone whistles a tune"], "sample_ids": ["vbpKkWvfOu4", "sIXTftIuUgw"], "start_seconds": ["560", "90"], "properties": ["a, man, speaks", "someone, tune, whistle"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a man speaks as water trickles down a stream", "a stream of water runs briefly"], "sample_ids": ["sapQIQUhFc", "x-PeY8Yb8M4"], "start_seconds": ["280", "300"], "properties": ["water, stream, trickles", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a car is driving on a wet road "], "question": "which stream is running", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["siJFXfGWgDk", "zj2R0XoFr5k"], "start_seconds": ["50", "50"], "properties": ["a, bird, vehicle", "airplane, boy, fly"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a woman speaks while a helicopter flies overhead "], "question": "which entity has a bird flying by?", "label": 0}, {"captions": ["a motorcycle engine is idling", "water splashes as an animal walks through"], "sample_ids": ["vZAqdHZ81yA", "w1ir-sZ3Im8"], "start_seconds": ["180", "90"], "properties": ["engine, motorcycle, idling", "animal, water, splashes"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["an engine is idling loudly", "water splashes and gurgles as people speak"], "question": "which entity is not a vehicle", "label": 1}, {"captions": ["motors runs briefly and tires screech", "a vehicles accelerate quickly and someone laughs"], "sample_ids": ["yRx9txMcBl0", "uWPRNLnpy7Y"], "start_seconds": ["40", "10"], "properties": ["motors, tires, screech", "accelerate, laugh, vehicle"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "is taken from a car driving down the street"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["some clanking with distant murmuring", "someone whistles a tune"], "sample_ids": ["uMTTDZ2mb4", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["clanking, murmuring, distant", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a person whistling a song"], "question": "which is a musical instrument", "label": 1}, {"captions": ["bees buzz as wind blows", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["tMJne1a4AFI", "wqZ135Ssz0"], "start_seconds": ["0", "60"], "properties": ["bees, buzz, wind", "two men, woman, birds"], "captions_pred_video": ["a swarm of bees on the ground", null], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a human activity", "label": 1}, {"captions": ["people clap and speak in the distance", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["wwyfGO2J4", "wDVMhEdTiVw"], "start_seconds": ["90", "30"], "properties": ["clap, distance, speak", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["water splashes as an animal walks through", "a man speaks as water trickles down a stream"], "sample_ids": ["w1ir-sZ3Im8", "sapQIQUhFc"], "start_seconds": ["90", "280"], "properties": ["animal, water, splashes", "water, stream, trickles"], "captions_pred_video": ["footage of a group of people riding horses through a river", null], "captions_pred_audio": ["water splashes and gurgles as people speak", "a man is speaking and a stream is flowing in the background "], "question": "which entity is a stream?", "label": 1}, {"captions": ["a woman and man speak while food is frying", "paper is crumpling consistently"], "sample_ids": ["zk-xJGQU8-4", "v5cSxLaHADY"], "start_seconds": ["130", "0"], "properties": ["food, man, woman", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "a woman sneezes then speaks"], "sample_ids": ["zj2R0XoFr5k", "x4dZyf9Gbj0"], "start_seconds": ["50", "130"], "properties": ["airplane, boy, fly", "sneezes, speaks, woman"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a woman sneezes and speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["xV7Mg1QucSc", "xfaoyyzw2WU"], "start_seconds": ["14", "180"], "properties": ["alarm, ticktocks, laughs", "loud, jet engine, roar"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "a vehicle engine accelerating then running on idle"], "sample_ids": ["tDVADusiIoc", "vYkA3cfXp5Q"], "start_seconds": ["60", "30"], "properties": ["wind, radio, waves", "engine, accelerate, idle"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "vehicles pass by on a roadway"], "sample_ids": ["zgUgkpk78xU", "tgbONvsP47Y"], "start_seconds": ["70", "0"], "properties": ["clinking, humming, horn", "pass, vehicle, roadway"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "small dogs yip and bark sharply"], "sample_ids": ["sfAvvZwdLCY", "v-wcQf4BDY0"], "start_seconds": ["20", "120"], "properties": ["flushes, drains, water", "bark, yip, sharply"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a toilet is flushed", "a dog barks and growls"], "question": "which entity is louder", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "a person speaks briefly"], "sample_ids": ["sWZzXuWYY", "zOZleIRqZm4"], "start_seconds": ["420", "80"], "properties": ["male, speech, banging", "person, talk, brief"], "captions_pred_video": [null, "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a man is speaking with crickets chirping in the background"], "question": "which entity is talking", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["tw76HGONaKg", "zl9Dqx-j7q4"], "start_seconds": ["570", "6"], "properties": ["A, game, keyboard", "engine, laugh, loud"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "a woman speaks with water running"], "sample_ids": ["vb1fPSDI4c", "wTideSjRFS0"], "start_seconds": ["30", "30"], "properties": ["multiple, people, yell", "water, running, woman"], "captions_pred_video": [null, "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a woman is speaking while water is running in the background"], "question": "which entity has more people speaking", "label": 0}, {"captions": ["multiple birds chirp and an animal grunts", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tDlysoZiA1I", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["animal, grunt, multiple", "airplane, boy, fly"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "an infant crying as a woman laughs"], "sample_ids": ["xSKJGCItUWE", "xhmRY9yhC7c"], "start_seconds": ["10", "20"], "properties": ["engine, run, boy", "a, laugh, infant"], "captions_pred_video": ["footage of the helicopter flying in the room", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["vddP56-ogds", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["liquid, laughs, man", "motor noise, horn, siren"], "captions_pred_video": [null, "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a truck is honking its horn and a siren is blaring "], "question": "which entity has a horn honking?", "label": 1}, {"captions": ["goats bleat and metal clings", "three men talk while wind blows and some liquid flows"], "sample_ids": ["tH17JPjDPnc", "vJ7JPEFhyLA"], "start_seconds": ["260", "16"], "properties": ["bleat, metal, clings", "three men, wind, flow"], "captions_pred_video": ["feed of the goats eating hay in the barn", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a liquid flowing?", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["wSVhSdj0F0", "ziUT9IFTkjg"], "start_seconds": ["10", "10"], "properties": ["horn honks, keys jingle, slam", "background, birds, rustling"], "captions_pred_video": [null, null], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "birds are chirping and a chime is ringing "], "question": "which entity is more natural", "label": 1}, {"captions": ["a baby cries and a woman speaks", "a man is filing a hard object"], "sample_ids": ["tMbMDvT50j8", "vveS8HT7Uog"], "start_seconds": ["12", "100"], "properties": ["a, cry, woman", "a man, hard, object"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "footage is of a workbench with various tools on it including a hammer and a screwdriver"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is filing and speaking with background noise and breathing "], "question": "which object is harder to file", "label": 0}, {"captions": ["a woman speaks as she rubs two objects together", "vehicle engines race around a track as a man commentates"], "sample_ids": ["vzxHnu-SFEw", "sZPuqDgX2V0"], "start_seconds": ["80", "30"], "properties": ["two objects, woman, speak", "commentator, race, track"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", null], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking and a helicopter is flying overhead "], "question": "which is a video", "label": 1}, {"captions": ["some clanking with distant murmuring", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["uMTTDZ2mb4", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["clanking, murmuring, distant", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a woman is speaking while food is frying in the background"], "question": "which entity is a video", "label": 1}, {"captions": ["small dogs yip and bark sharply", "water is sprayed across a hard surface"], "sample_ids": ["v-wcQf4BDY0", "sQwlkXjQabo"], "start_seconds": ["120", "10"], "properties": ["bark, yip, sharply", "water, spray, surface"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a dog barks and growls", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "a woman speaks and other women and a man talk with her"], "sample_ids": ["vveS8HT7Uog", "vbpKkWvfOu4"], "start_seconds": ["100", "560"], "properties": ["a man, objects, speak", "a, woman, man"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a woman is speaking and a man is speaking"], "question": "which entity has more people", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["vcmWSmvti8", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["music, man, fire", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a man is speaking with wind noise in the background "], "question": "which entity has a man speaking as music plays before artillery is fired?", "label": 0}, {"captions": ["loud clanking and banging with brief male speech", "a duck quacks continuously"], "sample_ids": ["sWZzXuWYY", "vh30P49Po6s"], "start_seconds": ["420", "30"], "properties": ["male, speech, banging", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["a consistent ticking pattern", "an airplane engine spools and people speak"], "sample_ids": ["sCeWURVHfOM", "wTjoRj1se3U"], "start_seconds": ["30", "390"], "properties": ["ticking, pattern, clock", "airplane, engine, spool"], "captions_pred_video": ["- a close-up view of the clock's inner workings", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["ticking of a clock", "a jet engine is running and people are talking"], "question": "which entity is a moving object", "label": 1}, {"captions": ["an aircraft engine runs", "a horse runs while two women talk"], "sample_ids": ["yLCORCnd35Q", "sdvI1mHAsc"], "start_seconds": ["0", "20"], "properties": ["engine, aircraft, runs", "two women, horse, run"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", null], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "horses clip-clop and a woman speaks"], "question": "which entity is moving", "label": 1}, {"captions": ["a motorcycle engine is idling", "multiple adults speaking, and a child shouting in the background"], "sample_ids": ["vZAqdHZ81yA", "yks4cLgIDMc"], "start_seconds": ["180", "170"], "properties": ["engine, motorcycle, idling", "background, speaking, child"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage of two kids wrestling on the floor"], "captions_pred_audio": ["an engine is idling loudly", "a man is speaking and a child is crying"], "question": "which entity has a child shouting in the background", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "paper is crumpling consistently"], "sample_ids": ["zkKdxzNC97Y", "v5cSxLaHADY"], "start_seconds": ["27", "0"], "properties": ["hard, surface, door", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a door is opened and closed", "paper is crumpled and crinkled"], "question": "which object is crumpling", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "small dogs growl, bark and yip."], "sample_ids": ["zdYdyF9-m8U", "sShpyu2l4YQ"], "start_seconds": ["7", "0"], "properties": ["wind, crash, shoreline", "growl, bark, yip"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "the puppies are playing with a toy"], "captions_pred_audio": ["waves crash and wind blows ", "a dog is barking and growling"], "question": "which entity is more active", "label": 1}, {"captions": ["a mechanical buzzing getting louder", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sEprKHm8Sj8", "xBxDz0CFVn0"], "start_seconds": ["90", "30"], "properties": ["noise, loud, buzzing", "stream, water, flow"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "footage is blurry and out of focus"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking with wind noise in the background "], "question": "which entity is a source of noise", "label": 0}, {"captions": ["a man speaking with light rustling", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["zOZleIRqZm4", "y8WEcpOlT3I"], "start_seconds": ["80", "40"], "properties": ["light, rustling, man", "harsh, wind, blows"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking with wind noise in the background "], "question": "which entity is more calm", "label": 0}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vfYTJq7nU", "tDVADusiIoc"], "start_seconds": ["130", "60"], "properties": ["rustling, ducks, quack", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "a child speaks in closed space"], "sample_ids": ["vveS8HT7Uog", "yW6FWLSLkx4"], "start_seconds": ["100", "40"], "properties": ["a man, objects, speak", "child, space, speak"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["an audience gives applause", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["x6iCUDmRpKQ", "wDVMhEdTiVw"], "start_seconds": ["38", "30"], "properties": ["applause, audience, give", "gun, shoot, water"], "captions_pred_video": ["a black background with the moon and stars in the sky", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a group of people are clapping and cheering", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["zkKdxzNC97Y", "ziUT9IFTkjg"], "start_seconds": ["27", "10"], "properties": ["loud, bang, noise", "background, birds, rustling"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "a clock ticktocks"], "sample_ids": ["ugHJF0hfYkg", "v-g-j2uTByM"], "start_seconds": ["10", "30"], "properties": ["loud, intense, propeller", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a helicopter is flying overhead ", "a clock is ticking loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "a stream of water flows as people talk and wind blows"], "sample_ids": ["spYNpeN7rPY", "xBxDz0CFVn0"], "start_seconds": ["1", "30"], "properties": ["a clock, ticktock, man", "stream, water, flow"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "water splashes as an animal walks through"], "sample_ids": ["tOj4tdLRaA", "w1ir-sZ3Im8"], "start_seconds": ["70", "90"], "properties": ["woman, laugh, baby", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a baby laughs and a woman speaks", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["water running down a sink while a man is talking", "a cat meows as a young woman speaks"], "sample_ids": ["vSeGhaZt-aI", "x5cuQjOdM3E"], "start_seconds": ["50", "30"], "properties": ["water, sink, talk", "cat, meows, young woman"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a black background with an airplane flying in the sky"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a cat meows and a woman speaks"], "question": "which entity is a cat?", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "a man speaks as a car is passing by"], "sample_ids": ["ylpYOorfH4o", "sK4u5T8hW78"], "start_seconds": ["410", "30"], "properties": ["engine, running, wind", "a, car, pass"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a man speaking over a running engine and blowing wind?", "label": 0}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "dishes cling together then a man begins to speak"], "sample_ids": ["vfYTJq7nU", "sQGXqGcwOTc"], "start_seconds": ["130", "3"], "properties": ["rustling, ducks, quack", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a duck quacks and a woman speaks", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["food is frying then a woman speaks", "water pouring and bubbling"], "sample_ids": ["ukxt9I7eMMg", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["food, woman, speak", "water, bubbles, pouring"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["w5W5Kqtc8E", "uZesmtKZGSw"], "start_seconds": ["100", "250"], "properties": ["wind, blow, vehicle", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["un9VQlzgZM", "zj2R0XoFr5k"], "start_seconds": ["5", "50"], "properties": ["females, talk, laugh", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["xM4joTqDVp4", "w34HjHr6gAY"], "start_seconds": ["160", "30"], "properties": ["background, chirp, birds", "beeps, hit, woman"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "an engine runs loudly"], "sample_ids": ["sncRqQ67iJU", "vqZuVbG6-HI"], "start_seconds": ["460", "130"], "properties": ["loud, repeatedly, man", "loud, engine, run"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a person is snoring", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["v7jJS8aAyA", "zj2R0XoFr5k"], "start_seconds": ["10", "50"], "properties": ["wind, blows, loudly", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a person is burping while a girl speaks", "a man speaks as a motor runs in the background"], "sample_ids": ["vdoxuJn9lTc", "xZepNM9qcRA"], "start_seconds": ["40", "30"], "properties": ["person, burp, girl", "background, motor, run"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a child speaks followed by a burp", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a church bell rings several times", "people applaud and hoot and chat quietly"], "sample_ids": ["sUVVjE3Ucp8", "wwyfGO2J4"], "start_seconds": ["0", "90"], "properties": ["ring, bell, several", "people, applaud, hoot"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", null], "captions_pred_audio": ["a church bell is ringing ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "a stream of water flows quickly"], "sample_ids": ["tEE3MpBt1sg", "wbHTKEJZyhc"], "start_seconds": ["50", "20"], "properties": ["drill, something, laugh", "stream, water, flow"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a waterfall is flowing and people are speaking "], "question": "which entity is moving", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "a man speaks as a car is passing by"], "sample_ids": ["vfYTJq7nU", "sK4u5T8hW78"], "start_seconds": ["130", "30"], "properties": ["rustling, ducks, quack", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["zj2R0XoFr5k", "uEU-Hg5MTN8"], "start_seconds": ["50", "27"], "properties": ["airplane, fly, overhead", "a woman, laughs, animal"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a woman speaking?", "label": 0}, {"captions": ["a muffled toilet flushes and the water drains", "a horse runs while two women talk"], "sample_ids": ["sfAvvZwdLCY", "sdvI1mHAsc"], "start_seconds": ["20", "20"], "properties": ["flushes, drains, water", "two women, horse, run"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "horses clip-clop and a woman speaks"], "question": "which entity is a horse?", "label": 1}, {"captions": ["birds chirp and a pop occurs before a man speaks", "a man speaks over intermittent keyboard taps"], "sample_ids": ["zuua6-5goWw", "tw76HGONaKg"], "start_seconds": ["30", "570"], "properties": ["sound, pop, bird", "audio, man, keyboard"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a man speaks and types on a computer keyboard "], "question": "which entity has a man speaking over intermittent keyboard taps?", "label": 1}, {"captions": ["various birds chirp and squeal, and an animal grunts", "loud clanking and banging with brief male speech"], "sample_ids": ["tDlysoZiA1I", "sWZzXuWYY"], "start_seconds": ["0", "420"], "properties": ["animal, grunts, chirps", "male, speech, banging"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", null], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a sewing machine runs and a man speaks"], "question": "which entity is more quiet", "label": 0}, {"captions": ["two women and a man talk while a kid cries", "winds blows roughly as a vehicle races past"], "sample_ids": ["wyllXV6PjKo", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["a kid, talk, cry", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a woman speaks and a baby cries", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a power tool runs and touches a surface"], "sample_ids": ["vbZ-0lGPneg", "zfvPRf3chY"], "start_seconds": ["30", "290"], "properties": ["a woman, a television program, a bird", "power tool, run, touch"], "captions_pred_video": ["of a man holding a baby duck in his hands", null], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a man is speaking while a power tool is being used "], "question": "which entity is touching a surface", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "someone whistles a tune"], "sample_ids": ["xO-Q2BlIIPU", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["two men, exclamation, speak", "someone, tune, whistle"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["an adult woman and an adult man speak", "a person sneezes followed by another person speaking"], "sample_ids": ["zTLVJCo4WEE", "t8CV69hcvF0"], "start_seconds": ["30", "210"], "properties": ["two people, adult, speak", "person, sneeze, follow"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "of an airplane flying in the dark sky at night"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a woman sneezes and speaks"], "question": "which entity shows two people speaking?", "label": 0}, {"captions": ["water splashes and a door squeaks", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["sdXV-ylviw", "wSVhSdj0F0"], "start_seconds": ["190", "10"], "properties": ["sound, splash, door", "horn honks, keys jingle, electronic beep"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dog barks and taps with background noise ", "a car horn honks and keys jangle with background noise "], "question": "which entity has a door?", "label": 0}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "vehicles pass by on a roadway"], "sample_ids": ["zALy31PjDl0", "tgbONvsP47Y"], "start_seconds": ["21", "0"], "properties": ["a man, a vehicle, a horn", "pass, vehicle, roadway"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a car is driving on the road "], "question": "which entity is about vehicles", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sapQIQUhFc", "xBxDz0CFVn0"], "start_seconds": ["280", "30"], "properties": ["water, trickles, flow", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a man is speaking with wind noise in the background "], "question": "which entity has more water flowing", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by an electronic beep", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["wSVhSdj0F0", "y2bVZ7rz-5M"], "start_seconds": ["10", "280"], "properties": ["horn honks, keys jingle, electronic beep", "motor noise, horn, siren"], "captions_pred_video": [null, "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a truck is honking its horn and a siren is blaring "], "question": "which entity has a horn honks twice and keys jingle", "label": 0}, {"captions": ["sirens ring and approach with humming of distant traffic", "a piece of wood is being placed down and sawed"], "sample_ids": ["xERFUeZONz8", "uiItxDsDMFI"], "start_seconds": ["0", "30"], "properties": ["ring, approach, traffic", "wood, piece, saw"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "a man cutting a log with an axe in the woods"], "captions_pred_audio": ["an emergency vehicle siren blares", "a saw is being used with background noise "], "question": "which entity is being cut", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["rqu8iB22IY", "vbZ-0lGPneg"], "start_seconds": ["5", "30"], "properties": ["sound, repeats, laugh", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "people speak in the background as a clock ticktocks"], "sample_ids": ["ukxt9I7eMMg", "vZAw4apG0Es"], "start_seconds": ["30", "30"], "properties": ["food, pan, cook", "background, clock, ticktocks"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a clock is ticking and people are talking"], "question": "which entity is a video of a person cooking?", "label": 0}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "vehicles pass by on a roadway"], "sample_ids": ["yI-KvObbDoY", "tgbONvsP47Y"], "start_seconds": ["260", "0"], "properties": ["sound, smack, wind", "pass, vehicle, roadway"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", "footage of a fire truck entering a garage"], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a clock ticktocks in wind", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["yVumC9TGknc", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, wind", "a woman, a television program, a bird"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a series of beeps and chirps", "a woman is speaking and a dog is whimpering"], "question": "which entity is a clock?", "label": 0}, {"captions": ["water gurgles, metal squeaks and the water stops", "race cars go around a track as a man commentates"], "sample_ids": ["x4a9YGIw4ok", "uZesmtKZGSw"], "start_seconds": ["120", "250"], "properties": ["water, gurgles, stops", "car, track, man"], "captions_pred_video": ["footage is blurry and out of focus", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a toilet flushes and water splashes", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is a video of a race?", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "someone whistles a tune"], "sample_ids": ["xERFUeZONz8", "sIXTftIuUgw"], "start_seconds": ["0", "90"], "properties": ["ring, approach, traffic", "someone, tune, whistle"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", null], "captions_pred_audio": ["an emergency vehicle siren blares", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "wind blows as people chatter quietly"], "sample_ids": ["tDlysoZiA1I", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["animal, grunt, multiple", "wind, chatter, people"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage is blurry and out of focus"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["water running down a sink while a man is talking", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vSeGhaZt-aI", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["water, sink, talk", "three men, wind, flow"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a man talking?", "label": 0}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["vlJS7LN2XyM", "ziUT9IFTkjg"], "start_seconds": ["30", "10"], "properties": ["background, clocks, ticking", "background, birds, rustling"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", null], "captions_pred_audio": ["a ticktock of a clock", "birds are chirping and a chime is ringing "], "question": "which entity has a bird in the background?", "label": 1}, {"captions": ["a person is snoring while sleeping", "a man speaks as a motor runs in the background"], "sample_ids": ["vJrjSeP17yE", "xZepNM9qcRA"], "start_seconds": ["40", "30"], "properties": ["a person is sleeping, snoring, person", "background, motor, run"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a person snoring loudly", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 0}, {"captions": ["birds vocalize and chirp continuously", "a child speaks in closed space"], "sample_ids": ["w1mlz3Pe4fU", "yW6FWLSLkx4"], "start_seconds": ["300", "40"], "properties": ["vocalize, chirp, continuously", "child, space, speak"], "captions_pred_video": ["of a bird in a cage", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["birds are chirping and singing", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["xjvTpk2Zpr8", "vbZ-0lGPneg"], "start_seconds": ["70", "30"], "properties": ["engine, run, wind", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a television program?", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["w2JXXIAdUdg", "xKB8O8LTs6s"], "start_seconds": ["10", "70"], "properties": ["snoring, distance, person", "music, gunfire, explosion"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a person snoring and a dog whimpering", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "water splashes as an animal walks through"], "sample_ids": ["s6DESzUTGjY", "w1ir-sZ3Im8"], "start_seconds": ["16", "90"], "properties": ["wind, laugh, woman", "animal, water, splashes"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a helicopter engine runs continuously"], "sample_ids": ["su6FAOcOA8c", "ugHJF0hfYkg"], "start_seconds": ["4", "10"], "properties": ["engine, run, woman", "engine, running, continuously"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a helicopter is flying overhead "], "question": "which entity has an engine running continuously", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["wTjoRj1se3U", "tw76HGONaKg"], "start_seconds": ["390", "570"], "properties": ["engine, run, people", "A, game, keyboard"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a jet engine is running and people are talking", "a man speaks and types on a computer keyboard "], "question": "which entity is a video game?", "label": 1}, {"captions": ["a car speeding up in the distance", "a frog croaks as other frogs croak in the background"], "sample_ids": ["u0TrcHhkPQ", "yswmmRZFItk"], "start_seconds": ["20", "0"], "properties": ["distance, car, speed", "background, frog, croak"], "captions_pred_video": [null, "a close up of a frog in the water"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a frog is croaking"], "question": "which entity is a croaking animal?", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "wind blowing followed by a zoom"], "sample_ids": ["vbZ-0lGPneg", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["a woman, a television program, a bird", "wind, blow, zoom"], "captions_pred_video": ["of a man holding a baby duck in his hands", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["wztCSUxOf8", "vlS6YMeWAPo"], "start_seconds": ["130", "40"], "properties": ["a crowd, yells, applauds", "sheep, baa, birds"], "captions_pred_video": [null, "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["animals bleat and cry out and then a woman speaks", "an engine runs loudly"], "sample_ids": ["yZp6xizR0yU", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["animal, bleat, cry", "loud, engine, run"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["two frogs croak at each other", "a frog croaks as other frogs croak in the background"], "sample_ids": ["zg0X6BnhOLQ", "yswmmRZFItk"], "start_seconds": ["410", "0"], "properties": ["two frogs, croak, at each other", "background, frog, croak"], "captions_pred_video": ["footage of lightning in the sky at night", "a close up of a frog in the water"], "captions_pred_audio": ["a frog is croaking", "a frog is croaking"], "question": "which frog is croaking", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "dogs bark as an engine runs and a person whistles"], "sample_ids": ["x5cuQjOdM3E", "zY3icUyMdh8"], "start_seconds": ["30", "20"], "properties": ["cat, meows, young woman", "dog, bark, engine"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage of a bus driving through a residential street at night"], "captions_pred_audio": ["a cat meows and a woman speaks", "a car is driving and dogs are barking and squealing "], "question": "which entity is more calm", "label": 0}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "birds vocalize and chirp continuously"], "sample_ids": ["xKB8O8LTs6s", "w1mlz3Pe4fU"], "start_seconds": ["70", "300"], "properties": ["music, gunfire, explosion", "vocalize, chirp, continuously"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "of a bird in a cage"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "birds are chirping and singing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a train engine runs and a horn blows", "wind blowing and birds chirping with the distant cooing of a large bird"], "sample_ids": ["zPX9o1uDiI", "wRBHTgrbiwg"], "start_seconds": ["40", "50"], "properties": ["engine, horn, run", "birds, chirp, cooing"], "captions_pred_video": [null, "of a bee pollinating the flowers in the field"], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "birds are chirping and insects are buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a large crowd cheers and applauds", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["rqfQRErjfk8", "vlS6YMeWAPo"], "start_seconds": ["170", "40"], "properties": ["crowd, cheers, applauds", "sheep, baa, birds"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["siJFXfGWgDk", "tDVADusiIoc"], "start_seconds": ["50", "60"], "properties": ["man, woman, vehicle", "water, radio, man"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "water is sprayed across a hard surface"], "sample_ids": ["s7knHCFW82w", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["blow horn, get close, train", "water, spray, surface"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "spraying followed by silence"], "question": "which is a liquid", "label": 1}, {"captions": ["a dark barks and whimpers", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sYj4hpDUZDQ", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["barks, whimpers, dark", "loud, laughter, intermittent"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a dog barks and a cat meows", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a drill runs and two people laugh"], "sample_ids": ["vBHyYJ8pL0", "tEE3MpBt1sg"], "start_seconds": ["2", "50"], "properties": ["noise, door, opening", "two people, laugh, drill"], "captions_pred_video": [null, "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "people are laughing breathing and speaking with background noise "], "question": "which entity is a drill?", "label": 1}, {"captions": ["an airplane engine runs", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["yVPZ2MNWpms", "wqZ135Ssz0"], "start_seconds": ["0", "60"], "properties": ["engine, airplane, runs", "two men, woman, birds"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", null], "captions_pred_audio": ["a car is driving by on the road ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a human activity", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "paper is crumpling consistently"], "sample_ids": ["wyllXV6PjKo", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["a baby, a woman, a man", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman speaks and a baby cries", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["two frogs croak at each other", "someone whistles a tune"], "sample_ids": ["zg0X6BnhOLQ", "sIXTftIuUgw"], "start_seconds": ["410", "90"], "properties": ["two frogs, croak, at each other", "someone, tune, whistle"], "captions_pred_video": ["footage of lightning in the sky at night", null], "captions_pred_audio": ["a frog is croaking", "a person whistling a song"], "question": "which entity is a human", "label": 1}, {"captions": ["a baby cries and a woman speaks", "water flows as men speak and yell"], "sample_ids": ["tMbMDvT50j8", "vJ7JPEFhyLA"], "start_seconds": ["12", "16"], "properties": ["a, cry, woman", "water, flow, men"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a baby crying and a woman speaking?", "label": 0}, {"captions": ["birds chirp and wind blows", "small dogs growl, bark and yip."], "sample_ids": ["sxIvBMSavMQ", "sShpyu2l4YQ"], "start_seconds": ["210", "0"], "properties": ["birds, chirp, wind", "growl, bark, yip"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "the puppies are playing with a toy"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a dog is barking and growling"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "water is sprayed across a hard surface"], "sample_ids": ["sapQIQUhFc", "sQwlkXjQabo"], "start_seconds": ["280", "10"], "properties": ["water, trickles, flow", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "spraying followed by silence"], "question": "which entity is a spray of water?", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "vehicles pass by on a roadway"], "sample_ids": ["ylpYOorfH4o", "tgbONvsP47Y"], "start_seconds": ["410", "0"], "properties": ["engine, running, wind", "pass, vehicle, roadway"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "a stream of water runs briefly"], "sample_ids": ["wy1eKjR7KC0", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["people, talk, distance", "stream, water, run"], "captions_pred_video": ["two police officers riding motorcycles down the street", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and a siren is going off", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a person snores loudly multiple times at a close distance"], "sample_ids": ["s4Uz1Ffgo04", "sSMl2vc3ek"], "start_seconds": ["100", "20"], "properties": ["roars, background, people speaking", "loud, multiple, distance"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a person snoring loudly"], "question": "which entity is louder", "label": 0}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "people speak as gunfire rings out"], "sample_ids": ["xKB8O8LTs6s", "wqTCwqVRDlk"], "start_seconds": ["70", "80"], "properties": ["music, radio, gunshots", "gunfire, ring, speak"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking and a gun is fired"], "question": "which entity has more gunshots", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "an infant crying as a woman laughs"], "sample_ids": ["uPDn2BFTHk", "xhmRY9yhC7c"], "start_seconds": ["140", "20"], "properties": ["lady, laugh, baby", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a baby cries and a woman speaks"], "question": "which entity is a video of a baby cooing and fidgeting as a lady speaks and laughs?", "label": 0}, {"captions": ["people clap and speak in the distance", "a clock ticktocks"], "sample_ids": ["wwyfGO2J4", "v-g-j2uTByM"], "start_seconds": ["90", "30"], "properties": ["clap, distance, speak", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["continuous snoring", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["sLkeqCDJIyw", "tDlysoZiA1I"], "start_seconds": ["120", "0"], "properties": ["loud, snoring, noise", "animal, grunts, chirps"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a person is snoring loudly", "birds are chirping and a rooster is crowing "], "question": "which entity is not a noise", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["rwtmaKiCcQU", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["nozzle, depressed, spray can", "a woman, something, fried"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "- a woman cooking in the kitchen"], "captions_pred_audio": ["spraying and people speaking", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "vehicles pass by on a roadway"], "sample_ids": ["uOpoD0gGXcs", "tgbONvsP47Y"], "start_seconds": ["120", "0"], "properties": ["chirps, woman, bird", "pass, vehicle, roadway"], "captions_pred_video": ["a herd of cows grazing in the field", "footage of a fire truck entering a garage"], "captions_pred_audio": ["birds are chirping and a man is speaking", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "a duck quacks loudly and continuously"], "sample_ids": ["s6DESzUTGjY", "vh30P49Po6s"], "start_seconds": ["16", "30"], "properties": ["wind, laugh, woman", "loud, continuous, quacks"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a machine beeps continuously", "wind blows and a vehicle blows a hard then a train blows a horn"], "sample_ids": ["y682ml90jGw", "wnpJndXuxLc"], "start_seconds": ["11", "50"], "properties": ["beeps, machine, continuously", "blows, vehicle, train"], "captions_pred_video": [null, "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a beeping sound is being made ", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is not a machine?", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "some tunes played by whistling"], "sample_ids": ["w5W5Kqtc8E", "u6BnG6YZqJ4"], "start_seconds": ["100", "0"], "properties": ["wind, blow, vehicle", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "three men talk while wind blows and some liquid flows"], "sample_ids": ["yajyRTUQk3U", "vJ7JPEFhyLA"], "start_seconds": ["400", "16"], "properties": ["noise, woman, speak", "three men, wind, flow"], "captions_pred_video": ["- a woman cooking in the kitchen", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "three men talk while wind blows and some liquid flows"], "sample_ids": ["uWAAAL4CIoc", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["a woman, chirps, animal", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "a woman and man are speaking"], "sample_ids": ["xV7Mg1QucSc", "vbpKkWvfOu4"], "start_seconds": ["14", "560"], "properties": ["alarm, ticktocks, laughs", "two people, speaking, woman, man"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a woman is speaking and a man is speaking"], "question": "which entity has two people speaking?", "label": 1}, {"captions": ["multiple ducks quack continuously", "wind blows as people chatter quietly"], "sample_ids": ["wfHeoPDLMaM", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["multiple, quack, continuously", "wind, chatter, people"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "footage is blurry and out of focus"], "captions_pred_audio": ["ducks are quacking", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "people applaud and hoot and chat quietly"], "sample_ids": ["sG7TyPnFDR0", "wwyfGO2J4"], "start_seconds": ["180", "90"], "properties": ["beeps, machine, smoke alarm", "people, applaud, hoot"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", null], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a car speeding up in the distance", "a machine beeps continuously"], "sample_ids": ["u0TrcHhkPQ", "y682ml90jGw"], "start_seconds": ["20", "11"], "properties": ["distance, car, speed", "beeps, machine, continuously"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a beeping sound is being made "], "question": "which entity is not silent", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["yDoT73BWsdA", "xKB8O8LTs6s"], "start_seconds": ["10", "70"], "properties": ["engine, revs, vehicle", "music, gunfire, explosion"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wSVhSdj0F0", "sSMl2vc3ek"], "start_seconds": ["10", "20"], "properties": ["beep, clang, footsteps", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["goats bleat and metal clings", "a girl talking, laughing and sneezing noise"], "sample_ids": ["tH17JPjDPnc", "y4tPJXBKDig"], "start_seconds": ["260", "20"], "properties": ["bleat, metal, clings", "a, noise, talk"], "captions_pred_video": ["feed of the goats eating hay in the barn", "footage of the woman wiping her nose with a tissue"], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a woman is speaking and coughing with background noise and breathing "], "question": "which entity is talking", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a toilet flushes and a female speaks"], "sample_ids": ["uEU-Hg5MTN8", "yaln9y8I7ms"], "start_seconds": ["27", "230"], "properties": ["a woman, laughs, animal", "female, flushes, toilet"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a toilet flushes and a man speaks"], "question": "which entity has a female speaking?", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "water flows as men speak and yell"], "sample_ids": ["v5P-ThUCINM", "vJ7JPEFhyLA"], "start_seconds": ["400", "16"], "properties": ["background, chirp, bird", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a more natural background", "label": 0}, {"captions": ["a fly buzzes around loudly as birds chirp", "a man speaks followed by another man speaking outside"], "sample_ids": ["uJV8NDaHqqk", "viuTg1M-dqg"], "start_seconds": ["100", "30"], "properties": ["loud, fly, chirp", "two men, speak, follow"], "captions_pred_video": ["a bee hive in a wooden box", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a human", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "a train horn blows as it passes by"], "sample_ids": ["vf9xf3vMsGM", "zVacuqSb4LI"], "start_seconds": ["540", "30"], "properties": ["A man speaks while turning a water faucet on.", "horn, blows, train"], "captions_pred_video": ["of the person washing their hands under the faucet", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which is louder", "label": 1}, {"captions": ["a person whistles and clicks a mouse", "a stream of water runs briefly"], "sample_ids": ["zCrAfDfv6-A", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["person, mouse, click", "stream, water, run"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a person whistles a song", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["water runs into a sink while men speak", "a car accelerates and wind blows"], "sample_ids": ["vzceMbklWc", "u0TrcHhkPQ"], "start_seconds": ["180", "20"], "properties": ["water, sink, run", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and a man is speaking", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "small dogs yip and bark sharply"], "sample_ids": ["yZmhM1HcsyE", "v-wcQf4BDY0"], "start_seconds": ["4", "120"], "properties": ["engine, roar, water", "bark, yip, sharply"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["electronic beeps occur in a short series", "females talk and laugh over gusting wind"], "sample_ids": ["y682ml90jGw", "un9VQlzgZM"], "start_seconds": ["11", "5"], "properties": ["beeps, series, electronic", "females, talk, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "a woman is speaking and laughing with wind noise and breathing in the background "], "question": "which entity is more natural", "label": 1}, {"captions": ["a man speaks as a machine runs", "small dogs yip and bark sharply"], "sample_ids": ["vD6lYD1l0BY", "v-wcQf4BDY0"], "start_seconds": ["330", "120"], "properties": ["a, machine, run", "bark, yip, sharply"], "captions_pred_video": ["game controller being held in the hands of the person", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a railroad crossing bell rings as a train horn blows", "a stream of water runs briefly"], "sample_ids": ["tZGN5a7ybxo", "x-PeY8Yb8M4"], "start_seconds": ["60", "300"], "properties": ["ring, train, horn", "stream, water, run"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a train is moving and blowing its horn ", "a car is driving on a wet road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["uEU-Hg5MTN8", "uYT5gxnyMWM"], "start_seconds": ["27", "50"], "properties": ["animal, grunts, snorts", "female, spraying, scream"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking and a baby is crying"], "question": "which entity has a female speaking?", "label": 0}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["uPDn2BFTHk", "zFjIWfSD-4"], "start_seconds": ["140", "410"], "properties": ["lady, laugh, baby", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a person", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "paper folding and crinkling"], "sample_ids": ["uOpoD0gGXcs", "zPpG3RD8lSs"], "start_seconds": ["120", "20"], "properties": ["chirps, woman, bird", "paper, fold, crinkle"], "captions_pred_video": ["a herd of cows grazing in the field", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["birds are chirping and a man is speaking", "the wind blows and a mouse clicks "], "question": "which entity is not a living thing", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["t8CV69hcvF0", "zFjIWfSD-4"], "start_seconds": ["210", "410"], "properties": ["person, sneeze, follow", "People, motor, brakes"], "captions_pred_video": ["of an airplane flying in the dark sky at night", null], "captions_pred_audio": ["a woman sneezes and speaks", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a person speaking?", "label": 0}, {"captions": ["water rushes by", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["x-PeY8Yb8M4", "wqZ135Ssz0"], "start_seconds": ["300", "60"], "properties": ["water, rushes, by", "two men, woman, birds"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", null], "captions_pred_audio": ["a car is driving on a wet road ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "a man laughs and speaks as cats purr and hiss"], "sample_ids": ["ziUT9IFTkjg", "vVhthZ45k3Y"], "start_seconds": ["10", "30"], "properties": ["background, birds, rustling", "cat, purr, hiss"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a man is speaking and a cat is meowing"], "question": "which entity is more animal", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "a horn rings out as a machine runs by"], "sample_ids": ["yajyRTUQk3U", "slZLHwNbbt4"], "start_seconds": ["400", "300"], "properties": ["a woman, something, fried", "a, horn, run"], "captions_pred_video": ["- a woman cooking in the kitchen", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is about a machine?", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "a vehicle accelerates before a race car idles then accelerates quickly"], "sample_ids": ["vf44CgrjT0A", "sjlVMgdGSK0"], "start_seconds": ["20", "30"], "properties": ["loud, long, person", "accelerates, vehicle, race car"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "a 1965 ford falcon drag racing at 100mph on a 1/8 mile track"], "captions_pred_audio": ["a loud burp", "a car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "an airplane engine runs"], "sample_ids": ["wudZTNBtVqc", "yVPZ2MNWpms"], "start_seconds": ["60", "0"], "properties": ["accelerates, engine, wind", "engine, airplane, runs"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a car is driving by on the road "], "question": "which entity has a moving engine", "label": 1}, {"captions": ["birds vocalize and a man speaks", "water flows and trickles"], "sample_ids": ["v0wPrLBI3hg", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["vocalize, bird, speak", "water, flow, trickle"], "captions_pred_video": ["footage of the pigeons feeding on the ground", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an engine runs and a man speaks", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["yT5WfYMRr-U", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["engine, run, man", "engine, revs, vehicle"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a race car accelerates and revs its engine "], "question": "which entity is about a vehicle engine?", "label": 1}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "someone is typing on a computer keyboard"], "sample_ids": ["vlS6YMeWAPo", "v0x1odnXtP0"], "start_seconds": ["40", "210"], "properties": ["sheep, baa, birds", "keyboard, type, computer"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "how to make money on youtube in spanish"], "captions_pred_audio": ["a goat bleats and birds chirp", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "a man speaks as a car is passing by"], "sample_ids": ["zofjfKhqLk8", "sK4u5T8hW78"], "start_seconds": ["10", "30"], "properties": ["background, metal, clank", "a, car, pass"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "someone sprays a liquid onto a hard surface making a hiss sound"], "sample_ids": ["s3cTDAj31g", "zO-LSSY92ZM"], "start_seconds": ["80", "30"], "properties": ["man, talk, woman", "liquid, surface, sound"], "captions_pred_video": [null, "youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'"], "captions_pred_audio": ["a man is speaking and a baby is crying", "steam is hissing and hissing"], "question": "which entity is silent", "label": 1}, {"captions": ["a door opens and birds chirp", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["yeFvk9x0wWI", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["door, open, birds", "gun, shoot, water"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to cause harm", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a woman speaks happily and an animal chirps"], "sample_ids": ["wz7N8YRy74I", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["rooster, crow, background, people", "a woman, chirps, animal"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", null], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a woman is speaking and a dog is barking "], "question": "which entity has a rooster?", "label": 0}, {"captions": ["a person sniffles and then sneezes in the distance", "a child speaks in closed space"], "sample_ids": ["uRlbY6aoBU", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["a, distance, sneeze", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is sneezing ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "some liquid flows while a woman laughs and man talks"], "sample_ids": ["zkKdxzNC97Y", "vddP56-ogds"], "start_seconds": ["27", "30"], "properties": ["loud, bang, noise", "liquid, laughs, man"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "water is running and gurgling and a man is speaking"], "question": "which entity is more quiet", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["zY3icUyMdh8", "xfaoyyzw2WU"], "start_seconds": ["20", "180"], "properties": ["dog, bark, engine", "loud, jet engine, roar"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "a train horn blows as it passes by"], "sample_ids": ["zF8yoL0rkbI", "zVacuqSb4LI"], "start_seconds": ["30", "30"], "properties": ["engine, run, someone", "horn, blows, train"], "captions_pred_video": ["footage of the traffic on the street at night", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["multiple ducks quack continuously", "a motor noise is accompanied by a door opening and closing"], "sample_ids": ["wfHeoPDLMaM", "vBHyYJ8pL0"], "start_seconds": ["30", "2"], "properties": ["multiple, quack, continuously", "noise, door, opening"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", null], "captions_pred_audio": ["ducks are quacking", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is accompanied by a door opening and closing?", "label": 1}, {"captions": ["people speak as gunfire rings out", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["wqTCwqVRDlk", "tDlysoZiA1I"], "start_seconds": ["80", "0"], "properties": ["gunfire, ring, speak", "animal, grunts, chirps"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a man is speaking and a gun is fired", "birds are chirping and a rooster is crowing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "a boat travels through the waves as the wind blows loudly and a man speaks over a radio"], "sample_ids": ["vimzuGQvdcU", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["a, man, yells", "wind, radio, waves"], "captions_pred_video": ["a group of people are rafting down a river", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a boat traveling through the waves?", "label": 1}, {"captions": ["footsteps followed by a flushing toilet", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["yXrw3GRMZag", "vbZ-0lGPneg"], "start_seconds": ["40", "30"], "properties": ["sound, toilet, flush", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a toilet bowl with water in it", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["rustling followed by a toilet flushing", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "a man speaking with light rustling"], "sample_ids": ["sa6TLVbooCc", "zOZleIRqZm4"], "start_seconds": ["240", "80"], "properties": ["people, laugh, child", "light, rustling, man"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a man is speaking with crickets chirping in the background"], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a person is snoring while sleeping", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vJrjSeP17yE", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["a person is sleeping, snoring, person", "stream, water, flow"], "captions_pred_video": ["a black background with a small plane flying in the sky", "footage is blurry and out of focus"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["water pouring and bubbling", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["uyRfq-jKPpo", "xKB8O8LTs6s"], "start_seconds": ["50", "70"], "properties": ["water, bubbles, pouring", "music, gunfire, explosion"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["water is running from a faucet", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "a woman speaks happily and an animal chirps"], "sample_ids": ["yajyRTUQk3U", "uWAAAL4CIoc"], "start_seconds": ["400", "0"], "properties": ["noise, woman, speak", "a woman, chirps, animal"], "captions_pred_video": ["- a woman cooking in the kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "people applaud and hoot and chat quietly"], "sample_ids": ["xzKKf9bKNUo", "wwyfGO2J4"], "start_seconds": ["10", "90"], "properties": ["background, noise, snoring", "people, applaud, hoot"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", null], "captions_pred_audio": ["a person snoring loudly", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "vehicles pass by on a roadway"], "sample_ids": ["wRBHTgrbiwg", "tgbONvsP47Y"], "start_seconds": ["50", "0"], "properties": ["bird, owl, speak", "pass, vehicle, roadway"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of a fire truck entering a garage"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a car is driving on the road "], "question": "which entity is more likely to be seen in a city", "label": 1}, {"captions": ["water rushes by", "paper is crumpling consistently"], "sample_ids": ["x-PeY8Yb8M4", "v5cSxLaHADY"], "start_seconds": ["300", "0"], "properties": ["water, rushes, by", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a car is driving on a wet road ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "a frog croaks as other frogs croak in the background"], "sample_ids": ["uOpoD0gGXcs", "yswmmRZFItk"], "start_seconds": ["120", "0"], "properties": ["chirps, woman, bird", "background, frog, croak"], "captions_pred_video": ["a herd of cows grazing in the field", "a close up of a frog in the water"], "captions_pred_audio": ["birds are chirping and a man is speaking", "a frog is croaking"], "question": "which entity is a frog?", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "a woman speaks and then a man speaks"], "sample_ids": ["sYITalLZjj4", "vbpKkWvfOu4"], "start_seconds": ["30", "560"], "properties": ["water, rushes, background, birds", "a, man, speaks"], "captions_pred_video": ["two ducks are swimming in the water near each other", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["wind blows and birds chirp", "a woman is speaking and a man is speaking"], "question": "which entity is a video of a woman speaking and then a man speaking?", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "wind blows and a vehicle blows a hard then a train blows a horn"], "sample_ids": ["siJFXfGWgDk", "wnpJndXuxLc"], "start_seconds": ["50", "50"], "properties": ["a, bird, vehicle", "blows, vehicle, train"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity has a train blowing a horn?", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["y8WEcpOlT3I", "vlS6YMeWAPo"], "start_seconds": ["40", "40"], "properties": ["harsh, wind, blows", "sheep, baa, birds"], "captions_pred_video": ["on how to use a sewing machine youtube", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person is snoring while sleeping", "a man speaks as a motor runs in the background"], "sample_ids": ["vJrjSeP17yE", "xZepNM9qcRA"], "start_seconds": ["40", "30"], "properties": ["a person is sleeping, snoring, person", "background, motor, run"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a person snoring loudly", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 0}, {"captions": ["water splashes and wind noise is made into a microphone", "a child speaks in closed space"], "sample_ids": ["sDSppXIlJrs", "yW6FWLSLkx4"], "start_seconds": ["27", "40"], "properties": ["microphone, water, wind", "child, space, speak"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "someone whistles a tune"], "sample_ids": ["yYEVLuqEytU", "sIXTftIuUgw"], "start_seconds": ["40", "90"], "properties": ["animal, pig, background", "someone, tune, whistle"], "captions_pred_video": ["a baby goat is being petted by a person's hand", null], "captions_pred_audio": ["several sheep bleat and a man speaks", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a man speaks as a machine runs", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vD6lYD1l0BY", "zl9Dqx-j7q4"], "start_seconds": ["330", "6"], "properties": ["a, machine, run", "engine, laugh, loud"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp and a dog breathes heavily", "a car speeding up in the distance"], "sample_ids": ["y2ZBGpgbhHM", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["dog, chirp, breathe", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds chirping and a dog panting", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a speedboat passes quickly on the water", "someone is typing on a computer keyboard"], "sample_ids": ["tjmoSi330GM", "v0x1odnXtP0"], "start_seconds": ["23", "210"], "properties": ["speed, water, boat", "keyboard, type, computer"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "how to make money on youtube in spanish"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "a person is typing on a keyboard"], "question": "which object is moving", "label": 0}, {"captions": ["a horn honks and then loudly blares", "a telephone rings followed by a woman talking"], "sample_ids": ["wnpJndXuxLc", "tGcFnX0GHI"], "start_seconds": ["50", "0"], "properties": ["horn, honk, loud", "ring, talk, woman"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", null], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is quieter", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "paper is crumpling consistently"], "sample_ids": ["ugHJF0hfYkg", "v5cSxLaHADY"], "start_seconds": ["10", "0"], "properties": ["loud, intense, propeller", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a helicopter is flying overhead ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["two frogs croak at each other", "water splashes as an animal walks through"], "sample_ids": ["zg0X6BnhOLQ", "w1ir-sZ3Im8"], "start_seconds": ["410", "90"], "properties": ["two frogs, croak, at each other", "animal, water, splashes"], "captions_pred_video": ["footage of lightning in the sky at night", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a frog is croaking", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "people cheer as a vehicle engine revs"], "sample_ids": ["vBHyYJ8pL0", "xjhAnI2q6hM"], "start_seconds": ["2", "6"], "properties": ["noise, door, opening", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a vehicle accelerates squealing tires", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sd7xVssqlw", "xKB8O8LTs6s"], "start_seconds": ["50", "70"], "properties": ["accelerates, tires, squealing", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a car accelerates and revs its engine ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "an infant crying as a woman laughs"], "sample_ids": ["yDoT73BWsdA", "xhmRY9yhC7c"], "start_seconds": ["10", "20"], "properties": ["engine, revs, vehicle", "a, laugh, infant"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vf44CgrjT0A", "tDVADusiIoc"], "start_seconds": ["20", "60"], "properties": ["loud, long, person", "water, radio, man"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a loud burp", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "a toilet flushes and water drains"], "sample_ids": ["s59PfAghdkM", "sfAvvZwdLCY"], "start_seconds": ["0", "20"], "properties": ["bird, chirp, background, horse, neigh", "water drains, flushes, water"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "footage of the toilet in the bathroom"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a small engine spits as it runs", "a whistling owl calls out repeatedly and insects screech"], "sample_ids": ["sZvwOuuPGP0", "w6RTHR6AeAg"], "start_seconds": ["50", "40"], "properties": ["spits, engine, runs", "call, owl, screech"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", null], "captions_pred_audio": ["a medium engine is running ", "an owl hoots and mechanisms operate "], "question": "which entity is a bird?", "label": 1}, {"captions": ["water running down a sink while a man is talking", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vSeGhaZt-aI", "wDVMhEdTiVw"], "start_seconds": ["50", "30"], "properties": ["water, sink, talk", "gun, shoot, water"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a gun?", "label": 1}, {"captions": ["some clanking with distant murmuring", "a person snores loudly multiple times at a close distance"], "sample_ids": ["uMTTDZ2mb4", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["clanking, murmuring, distant", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["vehicles pass by on a roadway", "some men converse over an engine running"], "sample_ids": ["tgbONvsP47Y", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["pass, vehicle, roadway", "men, converse, engine"], "captions_pred_video": ["footage of a fire truck entering a garage", null], "captions_pred_audio": ["a car is driving on the road ", "a man speaks while a motorcycle revs and accelerates "], "question": "which is not a vehicle", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "a dog whimpers as someone inhales/exhales briefly"], "sample_ids": ["sEprKHm8Sj8", "vmrxwuAMb2I"], "start_seconds": ["90", "40"], "properties": ["car, tires, slows", "a dog, inhales, exhales"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "of the dog laying on the bed with his head out of the blanket"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a dog barks and growls"], "question": "which entity is a living thing", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["x5cuQjOdM3E", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["cat, meows, young woman", "airplane, boy, fly"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a cat meows and a woman speaks", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a clock ticktocks continuously", "people cheer as a vehicle engine revs"], "sample_ids": ["vlJS7LN2XyM", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["ticktocks, clock, ticktocks continuously", "engine revs, vehicle, people"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a ticktock of a clock", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a clicking followed by some people laughing and a kid speaking", "waves crash against a shoreline and people speak"], "sample_ids": ["vz8868znkVQ", "yFB25fqfU8I"], "start_seconds": ["60", "300"], "properties": ["audio, click, kid speaking", "wave, crash, shoreline"], "captions_pred_video": ["a video of a plane flying over a cloudy sky", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a baby is laughing and breathing with background noise ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a video", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "a man speaks then blows a vehicle horn as wind blows"], "sample_ids": ["zofjfKhqLk8", "zALy31PjDl0"], "start_seconds": ["10", "21"], "properties": ["noise, stop, motor", "a man, a vehicle, a horn"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "a motorcycle is parked on the side of a brick walkway"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking and a car horn is honking"], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["an engine starts and increases in power", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["zjTG0gaGCUI", "wqZ135Ssz0"], "start_seconds": ["80", "60"], "properties": ["power, increase, engine", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a jet engine roars as wind blows ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a human activity", "label": 1}, {"captions": ["a woman speaks with water running", "wind blowing followed by a zoom"], "sample_ids": ["wTideSjRFS0", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["water, running, woman", "wind, blow, zoom"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a man speaks as water trickles down a stream", "a car speeding up in the distance"], "sample_ids": ["sapQIQUhFc", "u0TrcHhkPQ"], "start_seconds": ["280", "20"], "properties": ["water, stream, trickles", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man speaks while video game music plays with some clicking", "paper folding and crinkling"], "sample_ids": ["tw76HGONaKg", "zPpG3RD8lSs"], "start_seconds": ["570", "20"], "properties": ["music, click, man", "paper, fold, crinkle"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "the wind blows and a mouse clicks "], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp as a train approaches", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["xM4joTqDVp4", "zl9Dqx-j7q4"], "start_seconds": ["160", "6"], "properties": ["bird, chirp, train", "engine, laugh, loud"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a jet engine roars "], "question": "which entity is followed by a man laughing", "label": 1}, {"captions": ["water flows as men speak and yell", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vJ7JPEFhyLA", "vJ7JPEFhyLA"], "start_seconds": ["16", "16"], "properties": ["water, flow, men", "three men, wind, flow"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows men speaking and yelling?", "label": 0}, {"captions": ["loud, continuous burping", "an insect buzzes around continuously"], "sample_ids": ["y636gklDioE", "v25l1jef3JY"], "start_seconds": ["20", "0"], "properties": ["loud, continuous, burping", "buzzes, continuously, insect"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a person burps loudly several times", "a fly is buzzing around a microphone "], "question": "which entity is not a human", "label": 1}, {"captions": ["someone snores nearby", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["spJCm8tD9Zo", "uEU-Hg5MTN8"], "start_seconds": ["90", "27"], "properties": ["someone snores, nearby, someone", "animal, grunts, snorts"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "a vehicle accelerates and squeals tires"], "sample_ids": ["xyL9F5VrjkE", "yRx9txMcBl0"], "start_seconds": ["20", "40"], "properties": ["wind, motor, distance", "accelerates, tires, squeals"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a car is revving its engine and skidding "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a car accelerates and wind blows"], "sample_ids": ["xKB8O8LTs6s", "u0TrcHhkPQ"], "start_seconds": ["70", "20"], "properties": ["music, radio, gunshots", "accelerates, wind, blows"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", null], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["water flows as men speak and yell", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["vJ7JPEFhyLA", "wqZ135Ssz0"], "start_seconds": ["16", "60"], "properties": ["water, flow, men", "two men, woman, birds"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "people applaud and hoot and chat quietly"], "sample_ids": ["zcDwZ6W7E3E", "wwyfGO2J4"], "start_seconds": ["180", "90"], "properties": ["a, man, speak", "people, applaud, hoot"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", null], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "water pouring and bubbling"], "sample_ids": ["sfAvvZwdLCY", "uyRfq-jKPpo"], "start_seconds": ["20", "50"], "properties": ["flushes, drains, water", "water, bubbles, pouring"], "captions_pred_video": ["footage of the toilet in the bathroom", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a toilet is flushed", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "water flows and trickles"], "sample_ids": ["vveS8HT7Uog", "tB7hWb9gTuQ"], "start_seconds": ["100", "30"], "properties": ["a man, objects, speak", "water, flow, trickle"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["someone snores nearby", "a person snores loudly multiple times at a close distance"], "sample_ids": ["spJCm8tD9Zo", "sSMl2vc3ek"], "start_seconds": ["90", "20"], "properties": ["someone snores, nearby, someone", "loud, multiple, distance"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a person snoring loudly"], "question": "which entity is more annoying", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "a child speaks in closed space"], "sample_ids": ["zFjIWfSD-4", "yW6FWLSLkx4"], "start_seconds": ["410", "40"], "properties": ["People, motor, brakes", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a dog barks and whimpers", "water is sprayed across a hard surface"], "sample_ids": ["sShpyu2l4YQ", "sQwlkXjQabo"], "start_seconds": ["0", "10"], "properties": ["barks, whimpers, dog", "water, spray, surface"], "captions_pred_video": ["the puppies are playing with a toy", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a dog is barking and growling", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["wy1eKjR7KC0", "vlS6YMeWAPo"], "start_seconds": ["30", "40"], "properties": ["people, talk, distance", "sheep, baa, birds"], "captions_pred_video": ["two police officers riding motorcycles down the street", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a man is speaking and a siren is going off", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man talks as something metal hits against and glass is set down", "a infant makes noise and is excited"], "sample_ids": ["x6ijhqRY38s", "wIJK3-5y0kA"], "start_seconds": ["250", "30"], "properties": ["something metal, glass, hit", "noise, excited, infant"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tiDFTC-5vU", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["male, duck, laugh", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["someone is snoring while sleeping", "three men talk while wind blows and some liquid flows"], "sample_ids": ["ujMt0-D-x2k", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["snore, sleep, someone", "three men, wind, flow"], "captions_pred_video": ["of the dog playing with a toy on the floor", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a person", "label": 0}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a machine beeps continuously"], "sample_ids": ["uzQnlJXBbOM", "y682ml90jGw"], "start_seconds": ["50", "11"], "properties": ["ringing, beep, stop", "beeps, machine, continuously"], "captions_pred_video": ["footage of a person using a cell phone on a table", null], "captions_pred_audio": ["a telephone rings and a man speaks", "a beeping sound is being made "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "paper is crumpling consistently"], "sample_ids": ["yLy-WycbVVE", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["background, people, talk", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "paper is crumpled and crinkled"], "question": "which entity is more silent", "label": 1}, {"captions": ["waves crash against a shoreline and people speak", "an infant crying as a woman laughs"], "sample_ids": ["yFB25fqfU8I", "xhmRY9yhC7c"], "start_seconds": ["300", "20"], "properties": ["wave, crash, shoreline", "a, laugh, infant"], "captions_pred_video": ["footage of a person surfing in the ocean", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "a woman speaks as she rubs two objects together"], "sample_ids": ["zofjfKhqLk8", "vzxHnu-SFEw"], "start_seconds": ["10", "80"], "properties": ["background, metal, clank", "two objects, woman, speak"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a machine?", "label": 0}, {"captions": ["water splashes and a door squeaks", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sdXV-ylviw", "su6FAOcOA8c"], "start_seconds": ["190", "4"], "properties": ["sound, splash, door", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a dog barks and taps with background noise ", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a clock ticktocks"], "sample_ids": ["tOSWIURC-4", "v-g-j2uTByM"], "start_seconds": ["0", "30"], "properties": ["engine, work, nearby", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a lawn mower is running ", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["a woman speaks with water running", "small dogs yip and bark sharply"], "sample_ids": ["wTideSjRFS0", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["water, running, woman", "bark, yip, sharply"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["dogs barking and whimpering", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tIY7qOV3rEM", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["barking, whimpering, dog", "a woman, something, fried"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vSeGhaZt-aI", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["water, bubbles, run", "three men, wind, flow"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows a man speaking while water bubbles and runs?", "label": 0}, {"captions": ["a man talks followed by a woman shouting", "a toilet flushes and a female speaks"], "sample_ids": ["s3cTDAj31g", "yaln9y8I7ms"], "start_seconds": ["80", "230"], "properties": ["man, talk, woman", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a baby is crying", "a toilet flushes and a man speaks"], "question": "which entity is about a toilet?", "label": 1}, {"captions": ["wind blows as people chatter quietly", "dishes cling together then a man begins to speak"], "sample_ids": ["xBxDz0CFVn0", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["wind, chatter, people", "cling, speak, dishes"], "captions_pred_video": ["footage is blurry and out of focus", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "a man speaks as crickets sing"], "sample_ids": ["sa6TLVbooCc", "ryFDPxgDOGc"], "start_seconds": ["240", "570"], "properties": ["people, laugh, child", "a, crickets, sing"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "a group of people dressed in camouflage and hunting gear in the dark"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a man is speaking with crickets chirping in the background"], "question": "which entity has a child speaking?", "label": 0}, {"captions": ["wind blows strongly", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["w8uLijTqtlU", "tDVADusiIoc"], "start_seconds": ["70", "60"], "properties": ["wind, blows, strongly", "water, radio, man"], "captions_pred_video": ["footage is blurry and shaky", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more likely to be in a storm", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "water flows as men speak and yell"], "sample_ids": ["xvDdE3zNf8Y", "vJ7JPEFhyLA"], "start_seconds": ["120", "16"], "properties": ["a, female, speaks", "water, flow, men"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "winds blows roughly as a vehicle races past"], "sample_ids": ["wwyfGO2J4", "xjvTpk2Zpr8"], "start_seconds": ["90", "70"], "properties": ["people, applaud, hoot", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["dogs barking and whimpering", "multiple people speak and children yell while water gurgles"], "sample_ids": ["tIY7qOV3rEM", "vb1fPSDI4c"], "start_seconds": ["0", "30"], "properties": ["barking, whimpering, dog", "multiple, people, yell"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", null], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "paper is crumpling consistently"], "sample_ids": ["sQwlkXjQabo", "v5cSxLaHADY"], "start_seconds": ["10", "0"], "properties": ["liquid, surface, spray", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["spraying followed by silence", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["sLUnaPT5gM8", "vbZ-0lGPneg"], "start_seconds": ["0", "30"], "properties": ["loud, laughter, intermittent", "a woman, a television program, a bird"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a woman is speaking and a dog is whimpering"], "question": "which entity is more quiet", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["yYEVLuqEytU", "uZesmtKZGSw"], "start_seconds": ["40", "250"], "properties": ["animal, pig, background", "men, talk, cars"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "people speak in the background as a clock ticktocks"], "sample_ids": ["tiDFTC-5vU", "vZAw4apG0Es"], "start_seconds": ["30", "30"], "properties": ["male, duck, laugh", "background, clock, ticktocks"], "captions_pred_video": [null, "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a clock is ticking and people are talking"], "question": "which entity has a clock ticktocking in the background?", "label": 1}, {"captions": ["a toilet flushes and water drains", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sfAvvZwdLCY", "xKB8O8LTs6s"], "start_seconds": ["20", "70"], "properties": ["water drains, flushes, water", "music, gunfire, explosion"], "captions_pred_video": ["footage of the toilet in the bathroom", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a toilet is flushed", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is a scene of a toilet flushing and water draining?", "label": 0}, {"captions": ["a motorcycle idles loudly as wind blows", "wind blows as people chatter quietly"], "sample_ids": ["v7jJS8aAyA", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["wind, blows, loudly", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["humming of idling and revving engine with a man speaking", "dishes cling together then a man begins to speak"], "sample_ids": ["wqADXCzngMw", "sQGXqGcwOTc"], "start_seconds": ["340", "3"], "properties": ["audio, humming, revving", "cling, speak, dishes"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "mechanisms are operating and water is splashing "], "question": "which entity is a video", "label": 1}, {"captions": ["a woman speaks and food sizzles while frying", "a vehicle engine accelerating then running on idle"], "sample_ids": ["wTideSjRFS0", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["food, sizzle, woman", "engine, accelerate, idle"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["xzKKf9bKNUo", "xKB8O8LTs6s"], "start_seconds": ["10", "70"], "properties": ["background, noise, snoring", "music, gunfire, explosion"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a person snoring loudly", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "three men talk while wind blows and some liquid flows"], "sample_ids": ["zkKdxzNC97Y", "vJ7JPEFhyLA"], "start_seconds": ["27", "16"], "properties": ["loud, bang, noise", "three men, wind, flow"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a loud noise?", "label": 0}, {"captions": ["a man talks as several small engines run", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["u9A6VZQCZpU", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["a, man, talk", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "people speak as gunfire rings out"], "sample_ids": ["uiItxDsDMFI", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["wood, piece, saw", "gunfire, ring, speak"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a saw is being used with background noise ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sQGXqGcwOTc", "wz7N8YRy74I"], "start_seconds": ["3", "30"], "properties": ["audio, kid, giggles", "rooster, crow, background, men"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["a person speaks over rustling leaves", "a horse runs while two women talk"], "sample_ids": ["zOZleIRqZm4", "sdvI1mHAsc"], "start_seconds": ["80", "20"], "properties": ["rustling, leaves, person", "two women, horse, run"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "horses clip-clop and a woman speaks"], "question": "which entity has more action", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "some men converse over an engine running"], "sample_ids": ["xOZfdgAgJ9o", "sCiy7QS1U"], "start_seconds": ["40", "300"], "properties": ["woman, whimpering, speaking", "men, converse, engine"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a person sniffles and sneezes", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["uRlbY6aoBU", "uZesmtKZGSw"], "start_seconds": ["0", "250"], "properties": ["sneezes, sniffles, person", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is sneezing ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["an adult woman and an adult man speak", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zTLVJCo4WEE", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["two people, adult, speak", "a woman, something, fried"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a frog croaks as other frogs croak in the background"], "sample_ids": ["weDbePuc-Xc", "yswmmRZFItk"], "start_seconds": ["40", "0"], "properties": ["music, slaps, human", "background, frog, croak"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "a close up of a frog in the water"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a frog is croaking"], "question": "which entity is a croaking animal?", "label": 1}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wsHBIgzs9Fs", "zj2R0XoFr5k"], "start_seconds": ["50", "50"], "properties": ["horn, continuous, buzzing", "airplane, boy, fly"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "several insects fly while two men talk"], "sample_ids": ["zofjfKhqLk8", "s-T9OVOiMLo"], "start_seconds": ["10", "330"], "properties": ["noise, stop, motor", "several, fly, men"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "rain falls on a surface as men speak and thunder roars"], "sample_ids": ["tDVADusiIoc", "w0xsN8X18Y"], "start_seconds": ["60", "30"], "properties": ["man, radio, blows", "rain, thunder, surface"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking while a motorboat is moving in the background "], "question": "which entity is about a man speaking on a radio as wind blows?", "label": 0}, {"captions": ["someone snores nearby", "a man speaks as a car is passing by"], "sample_ids": ["spJCm8tD9Zo", "sK4u5T8hW78"], "start_seconds": ["90", "30"], "properties": ["someone snores, nearby, someone", "a, car, pass"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "a man speaks as insects buzz and a bird chirps"], "sample_ids": ["wRV8yMk886E", "t25U-v4k4ts"], "start_seconds": ["0", "40"], "properties": ["liquid, spray, nozzle", "a, chirps, bird"], "captions_pred_video": ["two cars are parked in a parking lot at night", "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a man is speaking and bees are buzzing"], "question": "which entity has a bird chirp?", "label": 1}, {"captions": ["an adult woman speaks over chopping and silverware noises", "a man speaks while turning a water faucet on"], "sample_ids": ["yYJksgsxx5U", "vf9xf3vMsGM"], "start_seconds": ["30", "540"], "properties": ["audio, woman, silverware", "A man speaks while turning a water faucet on."], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "of the person washing their hands under the faucet"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a man is speaking while water is running in the background"], "question": "which entity is a man?", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "rain falls on a surface as men speak and music plays"], "sample_ids": ["w34HjHr6gAY", "w0xsN8X18Y"], "start_seconds": ["30", "30"], "properties": ["beeps, squawk, child speaking", "music, surface, rain"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", null], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a man is speaking while a motorboat is moving in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a child speaks in closed space"], "sample_ids": ["ugHJF0hfYkg", "yW6FWLSLkx4"], "start_seconds": ["10", "40"], "properties": ["engine, running, continuously", "child, space, speak"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is not running continuously", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["zofjfKhqLk8", "tDVADusiIoc"], "start_seconds": ["10", "60"], "properties": ["background, metal, clank", "water, radio, man"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio?", "label": 1}, {"captions": ["a toilet flushes and water sputters as it drains", "a toilet flushes and water drains unevenly"], "sample_ids": ["smGI3C1NZc", "vhJWZheqaE"], "start_seconds": ["30", "0"], "properties": ["water, drain, toilet", "water drains unevenly, toilet flushes, water drains"], "captions_pred_video": [null, null], "captions_pred_audio": ["a toilet is flushed", "a toilet is flushed"], "question": "which toilet is leaking?", "label": 0}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a telephone rings followed by a woman talking"], "sample_ids": ["su6FAOcOA8c", "tGcFnX0GHI"], "start_seconds": ["4", "0"], "properties": ["engine, run, woman", "ring, talk, woman"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", null], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a dial tone sounds followed by a woman speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "people speak as gunfire rings out"], "sample_ids": ["wsHBIgzs9Fs", "wqTCwqVRDlk"], "start_seconds": ["50", "80"], "properties": ["horn, continuous, buzzing", "gunfire, ring, speak"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["a kid speaks followed by music playing", "water pouring and bubbling"], "sample_ids": ["tQWGZLItBXk", "uyRfq-jKPpo"], "start_seconds": ["170", "50"], "properties": ["music, kid, speak", "water, bubbles, pouring"], "captions_pred_video": ["worms revolution screenshots", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "water is running from a faucet"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["material crumbles into a microphone", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["vofpvUo6NAw", "zFjIWfSD-4"], "start_seconds": ["220", "410"], "properties": ["material, crumbles, microphone", "People, motor, brakes"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", null], "captions_pred_audio": ["paper is being crumpled and crinkled", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a recording", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a duck quacks continuously"], "sample_ids": ["yLy-WycbVVE", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["background, people, talk", "quacks, continuously, duck"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["a stream of water runs briefly", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["x-PeY8Yb8M4", "zj2R0XoFr5k"], "start_seconds": ["300", "50"], "properties": ["stream, water, run", "airplane, boy, fly"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a car is driving on a wet road ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "a propeller rotates loudly and intensely"], "sample_ids": ["ukxt9I7eMMg", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["continuous, woman, speaking", "loud, intense, propeller"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "a machine beeps continuously"], "sample_ids": ["wyllXV6PjKo", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["a baby, a woman, a man", "beeps, machine, continuously"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman speaks and a baby cries", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sfAvvZwdLCY", "vJ7JPEFhyLA"], "start_seconds": ["20", "16"], "properties": ["flushes, drains, water", "three men, wind, flow"], "captions_pred_video": ["footage of the toilet in the bathroom", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water flowing?", "label": 0}, {"captions": ["male speech with light ticking", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["xO-Q2BlIIPU", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["male, speech, ticking", "a woman, laughs, animal"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a joke", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "a child speaks in closed space"], "sample_ids": ["zcDwZ6W7E3E", "yW6FWLSLkx4"], "start_seconds": ["180", "40"], "properties": ["a, man, speak", "child, space, speak"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 0}, {"captions": ["a vehicle engine runs as a siren and horn sound", "people cheer as a vehicle engine revs"], "sample_ids": ["u--KhUW8l1Y", "xjhAnI2q6hM"], "start_seconds": ["0", "6"], "properties": ["sound, vehicle, horn", "engine revs, vehicle, people"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a truck is revving its engine and a man is speaking "], "question": "which vehicle is being driven by people?", "label": 1}, {"captions": ["an engine runs loudly", "a man speaks followed by another man speaking outside"], "sample_ids": ["vqZuVbG6-HI", "viuTg1M-dqg"], "start_seconds": ["130", "30"], "properties": ["loud, engine, run", "two men, speak, follow"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which is quieter", "label": 1}, {"captions": ["a few ducks quack and scamper and a man speaks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["w2bYrCVLT60", "vfYTJq7nU"], "start_seconds": ["120", "130"], "properties": ["ducks, speak, quack", "rustling, ducks, quack"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", null], "captions_pred_audio": ["ducks are quacking and a man is speaking", "a duck quacks and a woman speaks"], "question": "which entity has more ducks", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "a frog vocalizes while birds chirp"], "sample_ids": ["uKCSGgof8gI", "vMf1dLD6Sng"], "start_seconds": ["12", "6"], "properties": ["chirps, distance, signal", "frog, bird, vocalize"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "a frog in a pond with pink flowers in the background"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a frog croaks loudly"], "question": "which entity is a frog?", "label": 1}, {"captions": ["a person snoring several times", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["spJCm8tD9Zo", "zj2R0XoFr5k"], "start_seconds": ["90", "50"], "properties": ["snore, person, several", "airplane, boy, fly"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a person is snoring loudly", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a propeller moves loudly nearby", "a woman speaks happily and an animal chirps"], "sample_ids": ["ugHJF0hfYkg", "uWAAAL4CIoc"], "start_seconds": ["10", "0"], "properties": ["loud, propeller, move", "a woman, chirps, animal"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and a dog is barking "], "question": "which is quieter", "label": 1}, {"captions": ["wind blows and a stream of water flows nearby", "a clock ticktocks"], "sample_ids": ["sYITalLZjj4", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["stream, flow, wind", "ticktocks, clock, ticktocks"], "captions_pred_video": ["two ducks are swimming in the water near each other", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["wind blows and birds chirp", "a clock is ticking loudly"], "question": "which entity is a clock?", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["wnpJndXuxLc", "y2bVZ7rz-5M"], "start_seconds": ["50", "280"], "properties": ["blows, vehicle, train", "motor noise, horn, siren"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["a loud engine muffles a man as he speaks", "food is frying then a woman speaks"], "sample_ids": ["xyx6eNVEYRY", "ukxt9I7eMMg"], "start_seconds": ["380", "30"], "properties": ["loud, engine, muffles", "food, woman, speak"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "footage of a person preparing food on a stool in a kitchen"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a woman is speaking while food is frying in the background "], "question": "which entity is a man speaking?", "label": 0}, {"captions": ["dogs barking and whimpering", "a duck quacks continuously"], "sample_ids": ["tIY7qOV3rEM", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["barking, whimpering, dog", "quacks, continuously, duck"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a duck is quacking loudly"], "question": "which entity is a bird", "label": 1}, {"captions": ["a man woman speak while crickets sing", "a man speaks as a car is passing by"], "sample_ids": ["zTLVJCo4WEE", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["a, crickets, sing", "a, car, pass"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man is speaking with background noise and breathing sounds "], "question": "which entity is in a city", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "paper is crumpling consistently"], "sample_ids": ["vZAw4apG0Es", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["people, clock, converse", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a clock is ticking and people are talking", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["someone snores nearby", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["spJCm8tD9Zo", "uEU-Hg5MTN8"], "start_seconds": ["90", "27"], "properties": ["someone snores, nearby, someone", "a woman, laughs, animal"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a large crowd cheers and applauds", "people cheer as a vehicle engine revs"], "sample_ids": ["rqfQRErjfk8", "xjhAnI2q6hM"], "start_seconds": ["170", "6"], "properties": ["crowd, cheers, applauds", "engine revs, vehicle, people"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a truck is revving its engine and a man is speaking "], "question": "which entity is a crowd?", "label": 0}, {"captions": ["an engine revs and a turning noise is made", "water splashes as an animal walks through"], "sample_ids": ["tOSWIURC-4", "w1ir-sZ3Im8"], "start_seconds": ["0", "90"], "properties": ["noise, engine, revs", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a lawn mower is running ", "water splashes and gurgles as people speak"], "question": "which entity is not a noise", "label": 1}, {"captions": ["birds chirp as a bell rings", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["ziUT9IFTkjg", "vfYTJq7nU"], "start_seconds": ["10", "130"], "properties": ["chirp, bell, ring", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a duck quacks and a woman speaks"], "question": "which entity is about birds?", "label": 0}, {"captions": ["a man rubs two objects together then speaks", "a woman speaks as she rubs two objects together"], "sample_ids": ["vveS8HT7Uog", "vzxHnu-SFEw"], "start_seconds": ["100", "80"], "properties": ["a man, objects, speak", "two objects, woman, speak"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a man speaking?", "label": 0}, {"captions": ["an audience gives applause as a man yells and a group sings", "pigeons vocalize and birds chirp"], "sample_ids": ["tdWhHV3X25Q", "uiS58TNyUiw"], "start_seconds": ["60", "430"], "properties": ["applause, audience, yells", "vocalize, bird, chirp"], "captions_pred_video": ["a man is talking to another man on a stage in front of a microphone", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a man is speaking and a bee is buzzing"], "question": "which entity is not a person", "label": 1}, {"captions": ["a man is filing a hard object", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vveS8HT7Uog", "su6FAOcOA8c"], "start_seconds": ["100", "4"], "properties": ["a man, hard, object", "engine, idle, woman"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a woman is speaking and a subway train is moving "], "question": "which object is being filed", "label": 0}, {"captions": ["frogs croak and vocalize", "paper folding and crinkling"], "sample_ids": ["yswmmRZFItk", "zPpG3RD8lSs"], "start_seconds": ["0", "20"], "properties": ["croak, vocalize, frog", "paper, fold, crinkle"], "captions_pred_video": ["a close up of a frog in the water", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a frog is croaking", "the wind blows and a mouse clicks "], "question": "which entity is not a frog?", "label": 1}, {"captions": ["a few ducks quack and scamper and a man speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["w2bYrCVLT60", "wwyfGO2J4"], "start_seconds": ["120", "90"], "properties": ["ducks, speak, quack", "people, applaud, hoot"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", null], "captions_pred_audio": ["ducks are quacking and a man is speaking", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "a dog barks and whimpers"], "sample_ids": ["sa6TLVbooCc", "sShpyu2l4YQ"], "start_seconds": ["240", "0"], "properties": ["people, laugh, child", "barks, whimpers, dog"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "the puppies are playing with a toy"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a dog is barking and growling"], "question": "which entity is more quiet", "label": 1}, {"captions": ["rain falls on a surface as men speak and thunder roars", "a horn blows as a train chugs along and warning bells ring"], "sample_ids": ["w0xsN8X18Y", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["rain, thunder, surface", "a train, a horn, a bell"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "a duck quacks continuously"], "sample_ids": ["wRBHTgrbiwg", "vh30P49Po6s"], "start_seconds": ["50", "30"], "properties": ["bird, owl, speak", "quacks, continuously, duck"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a duck is quacking loudly"], "question": "which animal is speaking", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a machine beeps continuously"], "sample_ids": ["ugHJF0hfYkg", "y682ml90jGw"], "start_seconds": ["10", "11"], "properties": ["engine, running, continuously", "beeps, machine, continuously"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a beeping sound is being made "], "question": "which machine beeps continuously", "label": 1}, {"captions": ["wind blows as people chatter quietly", "wind blows as people chatter quietly"], "sample_ids": ["xBxDz0CFVn0", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["wind, chatter, people", "wind, chatter, people"], "captions_pred_video": ["footage is blurry and out of focus", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is a photograph", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "vehicles pass by on a roadway"], "sample_ids": ["ylpYOorfH4o", "tgbONvsP47Y"], "start_seconds": ["410", "0"], "properties": ["engine, run, loud", "pass, vehicle, roadway"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a car is driving on the road "], "question": "which vehicle is moving", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "a woman speaks happily and an animal chirps"], "sample_ids": ["w5W5Kqtc8E", "uWAAAL4CIoc"], "start_seconds": ["100", "0"], "properties": ["wind, engine, scream", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman is speaking and a dog is barking "], "question": "which entity is more calm", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "people cheer as a vehicle engine revs"], "sample_ids": ["ylpYOorfH4o", "xjhAnI2q6hM"], "start_seconds": ["410", "6"], "properties": ["motor, run, steady", "engine revs, vehicle, people"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a truck is revving its engine and a man is speaking "], "question": "which motor runs steadily as a man speaks", "label": 0}, {"captions": ["a horn blasts loudly as a train passes", "a infant makes noise and is excited"], "sample_ids": ["zsLxS-uLJTw", "wIJK3-5y0kA"], "start_seconds": ["20", "30"], "properties": ["horn, blast, train", "noise, excited, infant"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a baby cries and a woman speaks"], "question": "which is louder", "label": 0}, {"captions": ["a goat screams and people speak in the background", "a door slams shut roughly"], "sample_ids": ["xC8kbrKJmco", "zkKdxzNC97Y"], "start_seconds": ["0", "27"], "properties": ["background, goat, scream", "a door, slams, shut"], "captions_pred_video": [null, "footage of the door opening and closing in slow motion"], "captions_pred_audio": ["a goat is bleating ", "a door is opened and closed"], "question": "which entity is quieter", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a toilet flushes and water drains"], "sample_ids": ["y2bVZ7rz-5M", "sfAvvZwdLCY"], "start_seconds": ["280", "20"], "properties": ["motor noise, horn, siren", "water drains, flushes, water"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "paper is crumpling consistently"], "sample_ids": ["vms5XGTDVQc", "v5cSxLaHADY"], "start_seconds": ["220", "0"], "properties": ["paper, crumpled, crinkled", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["paper is crumpled and crinkled", "paper is crumpled and crinkled"], "question": "which paper is crumpling consistently", "label": 1}, {"captions": ["a motorcycle engine works nearby", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["tOSWIURC-4", "vfYTJq7nU"], "start_seconds": ["0", "130"], "properties": ["engine, work, nearby", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a lawn mower is running ", "a duck quacks and a woman speaks"], "question": "which entity is a natural event", "label": 1}, {"captions": ["vehicles pass by on a roadway", "pigeons vocalize and birds chirp"], "sample_ids": ["tgbONvsP47Y", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["pass, vehicle, roadway", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a fire truck entering a garage", "of the pigeon in the cage"], "captions_pred_audio": ["a car is driving on the road ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a vehicle?", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["uWPRNLnpy7Y", "tdWhHV3X25Q"], "start_seconds": ["10", "60"], "properties": ["accelerate, laugh, vehicle", "applause, audience, yells"], "captions_pred_video": ["is taken from a car driving down the street", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["wqZ135Ssz0", "uEU-Hg5MTN8"], "start_seconds": ["60", "27"], "properties": ["man, woman, squawks", "animal, grunts, snorts"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has a more snorts", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["v5P-ThUCINM", "vfYTJq7nU"], "start_seconds": ["400", "130"], "properties": ["background, chirp, bird", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and birds are chirping", "a duck quacks and a woman speaks"], "question": "which entity has more birds", "label": 1}, {"captions": ["a door opens and birds chirp", "wind blows as people chatter quietly"], "sample_ids": ["yeFvk9x0wWI", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["door, open, birds", "wind, chatter, people"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage is blurry and out of focus"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "men speak and a nozzle sprays liquid"], "sample_ids": ["u21-Z5gJCB8", "wRV8yMk886E"], "start_seconds": ["30", "0"], "properties": ["background, voice, man", "liquid, spray, nozzle"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "two cars are parked in a parking lot at night"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man speaks followed by a loud burst"], "question": "which entity is more active", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "water splashes as an animal walks through"], "sample_ids": ["wwyfGO2J4", "w1ir-sZ3Im8"], "start_seconds": ["90", "90"], "properties": ["people, applaud, hoot", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["u21-Z5gJCB8", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["background, voice, man", "male, duck, laugh"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking?", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "a child babbles as a woman speaks"], "sample_ids": ["vJvryTwuAV8", "wEBlkGWVWwE"], "start_seconds": ["16", "260"], "properties": ["audience, cheer, man", "a, babble, woman"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "shows a person writing on the whiteboard"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a woman is speaking and a child is speaking with background noise and clapping "], "question": "which entity is a child?", "label": 1}, {"captions": ["food is frying and sizzles", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zNRChLjqcU", "yajyRTUQk3U"], "start_seconds": ["220", "400"], "properties": ["food is frying, sizzles, food", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["water is running from a faucet into a sink", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman talking while something is fried?", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["sZPuqDgX2V0", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["engine, accelerate, intercom", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio?", "label": 1}, {"captions": ["a beep occurs briefly", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["xtWeJ56-U-g", "w34HjHr6gAY"], "start_seconds": ["20", "30"], "properties": ["beep, occur, briefly", "beeps, hit, woman"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a infant makes noise and is excited"], "sample_ids": ["su6FAOcOA8c", "wIJK3-5y0kA"], "start_seconds": ["4", "30"], "properties": ["engine, run, woman", "noise, excited, infant"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["u--KhUW8l1Y", "sLUnaPT5gM8"], "start_seconds": ["0", "0"], "properties": ["horn, siren, life", "loud, laughter, intermittent"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a goat screams and people speak in the background", "people applaud and hoot and chat quietly"], "sample_ids": ["xC8kbrKJmco", "wwyfGO2J4"], "start_seconds": ["0", "90"], "properties": ["background, goat, scream", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a goat is bleating ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a vehicle engine runs and someone speaks"], "sample_ids": ["yLy-WycbVVE", "zF8yoL0rkbI"], "start_seconds": ["30", "30"], "properties": ["background, people, talk", "engine, run, someone"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "footage of the traffic on the street at night"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "the wind is blowing hard and water is splashing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a toilet flushes and water drains", "a vehicle engine accelerating then running on idle"], "sample_ids": ["sfAvvZwdLCY", "vYkA3cfXp5Q"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "engine, accelerate, idle"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a toilet is flushed", "an engine is idling"], "question": "which entity is a machine", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "a man speaks as a car is passing by"], "sample_ids": ["slZLHwNbbt4", "sK4u5T8hW78"], "start_seconds": ["300", "30"], "properties": ["clap, distance, horn", "a, car, pass"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a propeller moves loudly nearby", "pigeons vocalize and birds chirp"], "sample_ids": ["ugHJF0hfYkg", "uiS58TNyUiw"], "start_seconds": ["10", "430"], "properties": ["loud, propeller, move", "vocalize, bird, chirp"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "of the pigeon in the cage"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking and a bee is buzzing"], "question": "which is quieter", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "propeller rearing loudly with some male and female voices interspersed in the background"], "sample_ids": ["x9JovgqUcs", "vqZuVbG6-HI"], "start_seconds": ["500", "130"], "properties": ["a, man, speaks, keyboard", "background, male, female"], "captions_pred_video": [null, "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a lawn mower is running and men are speaking "], "question": "which entity has a man speaking and typing on a keyboard?", "label": 0}, {"captions": ["a clock ticktocks continuously", "a child speaks in closed space"], "sample_ids": ["vlJS7LN2XyM", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["ticktocks, clock, ticktocks continuously", "child, space, speak"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a ticktock of a clock", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "water flows and trickles"], "sample_ids": ["zuua6-5goWw", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["birds, chirp, quiet, man, speaks", "water, flow, trickle"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a person snoring several times", "a pigeon cooing as an insect buzzes by briefly"], "sample_ids": ["spJCm8tD9Zo", "yZrFNS7GFBQ"], "start_seconds": ["90", "30"], "properties": ["snore, person, several", "pigeon, buzzes, insect"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of the bird in the cage"], "captions_pred_audio": ["a person is snoring loudly", "an owl hoots in the background "], "question": "which entity is not a person?", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "a man talks nearby and another man talks far away while some liquid flows"], "sample_ids": ["s4Uz1Ffgo04", "sapQIQUhFc"], "start_seconds": ["100", "280"], "properties": ["water, rushes, motorcycle", "liquid, flow, distance"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking and a stream is flowing in the background "], "question": "which entity is more distant", "label": 1}, {"captions": ["tapping occurs then a baby cries", "an adult speaks and is typing on a computer keyboard"], "sample_ids": ["wIJK3-5y0kA", "x9JovgqUcs"], "start_seconds": ["30", "500"], "properties": ["a, cry, baby", "An adult is speaking, typing, and using a computer keyboard"], "captions_pred_video": ["of a baby playing with a cat in a dark room", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a man speaks and types on a keyboard"], "question": "which entity is typing?", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["uWAAAL4CIoc", "xfaoyyzw2WU"], "start_seconds": ["0", "180"], "properties": ["a woman, chirps, animal", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["an airplane engine spools and people speak", "a stream of water flows as people talk and wind blows"], "sample_ids": ["wTjoRj1se3U", "xBxDz0CFVn0"], "start_seconds": ["390", "30"], "properties": ["airplane, engine, spool", "stream, water, flow"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "footage is blurry and out of focus"], "captions_pred_audio": ["a jet engine is running and people are talking", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["birds tweet and squawk", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["w1mlz3Pe4fU", "tDlysoZiA1I"], "start_seconds": ["300", "0"], "properties": ["squawk, tweet, scream", "animal, grunts, chirps"], "captions_pred_video": ["of a bird in a cage", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["birds are chirping and singing", "birds are chirping and a rooster is crowing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sNB8zxXneIM", "yajyRTUQk3U"], "start_seconds": ["20", "400"], "properties": ["several, quack, cocks", "a woman, something, fried"], "captions_pred_video": ["a group of geese in a cage", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["vehicles pass by on a roadway", "a man speaks on a radio as wind blows"], "sample_ids": ["tgbONvsP47Y", "tDVADusiIoc"], "start_seconds": ["0", "60"], "properties": ["pass, vehicle, roadway", "man, radio, blows"], "captions_pred_video": ["footage of a fire truck entering a garage", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a car is driving on the road ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is stationary", "label": 1}, {"captions": ["a man speaks as horns blow", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["tHyNqRyK34A", "uYT5gxnyMWM"], "start_seconds": ["24", "50"], "properties": ["a, man, speaks", "female, spraying, scream"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity is a woman?", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "people speak and tapping occurs"], "sample_ids": ["vXlk0lIQBFo", "tFCUUGdREgA"], "start_seconds": ["470", "70"], "properties": ["wind, talk, vocalize", "people, tap, speak"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "a person riding a white horse in an indoor arena"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a man is speaking and walking with wind noise in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "someone whistles a tune"], "sample_ids": ["vVhthZ45k3Y", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["cat, purr, hiss", "someone, tune, whistle"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a person whistling a song"], "question": "which entity is a human activity", "label": 1}, {"captions": ["a baby cries and wails as an adult female speaks", "a car accelerates and wind blows"], "sample_ids": ["zliInBdC98Y", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["a, baby, cries, wails", "accelerates, wind, blows"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["an engine runs loudly", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vqZuVbG6-HI", "su6FAOcOA8c"], "start_seconds": ["130", "4"], "properties": ["loud, engine, run", "engine, idle, woman"], "captions_pred_video": ["footage is blurry because it's raining outside", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a woman is speaking and a subway train is moving "], "question": "which entity is quieter", "label": 1}, {"captions": ["several ducks are quacking and squawking", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["wfHeoPDLMaM", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["quacking, squawking, ducks", "rooster, crow, background, men"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["ducks are quacking", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a bird", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a clock ticktocks"], "sample_ids": ["yYEVLuqEytU", "v-g-j2uTByM"], "start_seconds": ["40", "30"], "properties": ["animal, pig, background", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "waves crash against a shoreline and people speak"], "sample_ids": ["s6DESzUTGjY", "yFB25fqfU8I"], "start_seconds": ["16", "300"], "properties": ["wind, laugh, woman", "wave, crash, shoreline"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more calm", "label": 0}, {"captions": ["a man talks as several small engines run", "a rumbling clap in the distance followed by a horn and the rumbling grows louder"], "sample_ids": ["u9A6VZQCZpU", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["a, man, talk", "clap, distance, horn"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is not a clap?", "label": 0}, {"captions": ["a woman speaks and then a man speaks", "a piece of wood is being placed down and sawed"], "sample_ids": ["vbpKkWvfOu4", "uiItxDsDMFI"], "start_seconds": ["560", "30"], "properties": ["a, man, speaks", "wood, piece, saw"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "a man cutting a log with an axe in the woods"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a saw is being used with background noise "], "question": "which entity is about a piece of wood being sawed?", "label": 1}, {"captions": ["a man speaks while video game music plays with some clicking", "a toilet flushes and water sputters as it drains"], "sample_ids": ["tw76HGONaKg", "smGI3C1NZc"], "start_seconds": ["570", "30"], "properties": ["music, click, man", "water, drain, toilet"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", null], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a toilet is flushed"], "question": "which entity is silent", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["uYT5gxnyMWM", "vbZ-0lGPneg"], "start_seconds": ["50", "30"], "properties": ["person, spray, yell", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "a man talks followed by a woman shouting"], "sample_ids": ["yeFvk9x0wWI", "s3cTDAj31g"], "start_seconds": ["30", "80"], "properties": ["clack, bird, chirp", "man, talk, woman"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", null], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking and a baby is crying"], "question": "which entity is a human", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "running water in a faucet with some clinks"], "sample_ids": ["su6FAOcOA8c", "zNRChLjqcU"], "start_seconds": ["4", "220"], "properties": ["engine, idle, woman", "water, faucet, run"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", null], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "water is running from a faucet into a sink"], "question": "which entity is a source of water", "label": 1}, {"captions": ["small dogs yip and bark sharply", "a train horn blows as it passes by"], "sample_ids": ["v-wcQf4BDY0", "zVacuqSb4LI"], "start_seconds": ["120", "30"], "properties": ["bark, yip, sharply", "horn, blows, train"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a dog barks and growls", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp and a pop occurs before a man speaks", "a man talks nearby and another man talks far away while some liquid flows"], "sample_ids": ["zuua6-5goWw", "sapQIQUhFc"], "start_seconds": ["30", "280"], "properties": ["sound, pop, bird", "liquid, flow, distance"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", null], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a man is speaking and a stream is flowing in the background "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["motors rev and run loudly as a person laughs", "a man speaks uses a drill"], "sample_ids": ["zl9Dqx-j7q4", "x5eIC7S0fbg"], "start_seconds": ["6", "60"], "properties": ["motors rev, laugh, loudly", "A man is speaking, uses a drill, and is a tool"], "captions_pred_video": ["footage of a man driving a car in the dark", "a person in surgical gloves is using a needle to remove a small object from a tooth"], "captions_pred_audio": ["a jet engine roars ", "a man is speaking and using a power tool "], "question": "which entity is a tool", "label": 1}, {"captions": ["food is frying and sizzles", "a clock ticktocks and sounds an alarm then a man laughs"], "sample_ids": ["zNRChLjqcU", "xV7Mg1QucSc"], "start_seconds": ["220", "14"], "properties": ["food is frying, sizzles, food", "alarm, ticktocks, laughs"], "captions_pred_video": [null, "a cuckoo clock hanging on the wall"], "captions_pred_audio": ["water is running from a faucet into a sink", "an alarm clock ticks and a woman laughs"], "question": "which entity is not a clock?", "label": 0}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "pigeons vocalize and birds chirp"], "sample_ids": ["vlJS7LN2XyM", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["background, clocks, ticking", "vocalize, bird, chirp"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "of the pigeon in the cage"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "water pouring and bubbling"], "sample_ids": ["sZPuqDgX2V0", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["commentator, race, track", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an engine runs and a man speaks", "a man speaks as a motor runs in the background"], "sample_ids": ["yT5WfYMRr-U", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["engine, run, man", "background, motor, run"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a man speaking while an engine runs?", "label": 0}, {"captions": ["a male speaks and another male speaks", "paper folding and crinkling"], "sample_ids": ["viuTg1M-dqg", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["two males, speaking, male", "paper, fold, crinkle"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "the wind blows and a mouse clicks "], "question": "which entity is a demonstration of folding and crinkling paper?", "label": 1}, {"captions": ["a frog vocalizes while birds chirp", "a infant makes noise and is excited"], "sample_ids": ["vMf1dLD6Sng", "wIJK3-5y0kA"], "start_seconds": ["6", "30"], "properties": ["frog, bird, vocalize", "noise, excited, infant"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a frog croaks loudly", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["male speech with light ticking", "multiple people speak and children yell while water gurgles"], "sample_ids": ["xO-Q2BlIIPU", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["male, speech, ticking", "multiple, people, yell"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "dishes cling together then a man begins to speak"], "sample_ids": ["sYITalLZjj4", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["water, rushes, background, birds", "cling, speak, dishes"], "captions_pred_video": ["two ducks are swimming in the water near each other", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["wind blows and birds chirp", "mechanisms are operating and water is splashing "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "a car revs and accelerates loudly and men and women chatter among themselves"], "sample_ids": ["yZrFNS7GFBQ", "y8dSeubCNI"], "start_seconds": ["30", "4"], "properties": ["pigeon, buzzes, insect", "men, women, car"], "captions_pred_video": ["of the bird in the cage", null], "captions_pred_audio": ["an owl hoots in the background ", "an engine revving and people talking in the background"], "question": "which entity is more likely to be in motion", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "continuous sneezing together with speech"], "sample_ids": ["xzKKf9bKNUo", "x4dZyf9Gbj0"], "start_seconds": ["10", "130"], "properties": ["background, noise, snoring", "continuous, sneeze, speech"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "footage is blurry and out of focus"], "captions_pred_audio": ["a person snoring loudly", "a woman sneezes and speaks"], "question": "which entity is more like a sneeze", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "a woman and man are speaking"], "sample_ids": ["tK4VlLsNxak", "vbpKkWvfOu4"], "start_seconds": ["120", "560"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "two people, speaking, woman, man"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a woman is speaking and a man is speaking"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "a man speaks as a car is passing by"], "sample_ids": ["sSMl2vc3ek", "sK4u5T8hW78"], "start_seconds": ["20", "30"], "properties": ["a person, laughs, snores", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["spYNpeN7rPY", "wDVMhEdTiVw"], "start_seconds": ["1", "30"], "properties": ["a clock, ticktock, man", "gun, shoot, water"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["birds chirp and objects are moved around", "water flows and trickles"], "sample_ids": ["yPUYU6t3rwo", "tB7hWb9gTuQ"], "start_seconds": ["370", "30"], "properties": ["birds chirp, objects are moved around, birds", "water, flow, trickle"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["insects buzz and a man speaks", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["weDbePuc-Xc", "ukg5L09Wpvo"], "start_seconds": ["40", "150"], "properties": ["cartoon character, music, vocalize", "clickety-clack, train, whistle"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["children speak as a female ask them questions", "children cheer as a man speaks then an audience screams"], "sample_ids": ["wEBlkGWVWwE", "vJvryTwuAV8"], "start_seconds": ["260", "16"], "properties": ["female, speak, questions", "audience, cheer, man"], "captions_pred_video": ["shows a person writing on the whiteboard", "a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man is speaking and a crowd is shouting and whooping "], "question": "which entity is a performance", "label": 1}, {"captions": ["women speak and laugh as wind blows", "people applaud and hoot and chat quietly"], "sample_ids": ["un9VQlzgZM", "wwyfGO2J4"], "start_seconds": ["5", "90"], "properties": ["wind, speak, laugh", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "some men converse over an engine running"], "sample_ids": ["sjlVMgdGSK0", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["accelerates, vehicle, race car", "men, converse, engine"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man speaks while a motorcycle revs and accelerates "], "question": "which is a still image", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["v5P-ThUCINM", "vlS6YMeWAPo"], "start_seconds": ["400", "40"], "properties": ["background, chirp, bird", "sheep, baa, birds"], "captions_pred_video": [null, "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a goat bleats and birds chirp"], "question": "which entity has more birds", "label": 1}, {"captions": ["multiple insects buzz over rustling wind", "birds chirp as a train approaches"], "sample_ids": ["tMJne1a4AFI", "xM4joTqDVp4"], "start_seconds": ["0", "160"], "properties": ["wind, buzz, rustling", "bird, chirp, train"], "captions_pred_video": ["a swarm of bees on the ground", "footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack"], "captions_pred_audio": ["a swarm of bees buzzing around", "birds are chirping and a train is moving "], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "paper folding and crinkling"], "sample_ids": ["siJFXfGWgDk", "zPpG3RD8lSs"], "start_seconds": ["50", "20"], "properties": ["a, bird, vehicle", "paper, fold, crinkle"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "the wind blows and a mouse clicks "], "question": "which entity is a demonstration of folding and crinkling?", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "a car speeding up in the distance"], "sample_ids": ["sAam2NqGhLY", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["snoring, breathing, child", "distance, car, speed"], "captions_pred_video": ["of a little girl sleeping on a couch", null], "captions_pred_audio": ["a person is snoring", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "water splashes as an animal walks through"], "sample_ids": ["uRExseg-0XI", "w1ir-sZ3Im8"], "start_seconds": ["210", "90"], "properties": ["woman, man, water", "animal, water, splashes"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "water splashes and gurgles as people speak"], "question": "which entity is about water", "label": 1}, {"captions": ["a woman and man speak while food is frying", "someone is typing on a computer keyboard"], "sample_ids": ["zk-xJGQU8-4", "v0x1odnXtP0"], "start_seconds": ["130", "210"], "properties": ["food, man, woman", "keyboard, type, computer"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "how to make money on youtube in spanish"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a person is typing on a keyboard"], "question": "which entity is a person", "label": 1}, {"captions": ["birds chirp and pigeons vocalize while walking around", "someone is typing on a computer keyboard"], "sample_ids": ["wIvYjuR3nrg", "v0x1odnXtP0"], "start_seconds": ["9", "210"], "properties": ["birds, pigeons, vocalize", "keyboard, type, computer"], "captions_pred_video": ["footage of a pigeon sitting on a roof with trees in the background", "how to make money on youtube in spanish"], "captions_pred_audio": ["birds are chirping and cooing", "a person is typing on a keyboard"], "question": "which entity is typing", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["sWZzXuWYY", "y2bVZ7rz-5M"], "start_seconds": ["420", "280"], "properties": ["male, speech, banging", "motor noise, horn, siren"], "captions_pred_video": [null, "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a truck is honking its horn and a siren is blaring "], "question": "which entity is a warning", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "some tunes played by whistling"], "sample_ids": ["vbpKkWvfOu4", "u6BnG6YZqJ4"], "start_seconds": ["560", "0"], "properties": ["a, man, speaks", "tune, play, whistling"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "a person snores loudly multiple times at a close distance"], "sample_ids": ["u5RmF3c3Aw", "sSMl2vc3ek"], "start_seconds": ["60", "20"], "properties": ["engine, car, zoom", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vBslzh7saPw", "xfaoyyzw2WU"], "start_seconds": ["90", "180"], "properties": ["power, scream, increase", "loud, jet engine, roar"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a jet engine roars and accelerates ", "an aircraft engine roars and a man speaks "], "question": "which jet engine is louder", "label": 1}, {"captions": ["a man woman speak while crickets sing", "birds chirp, a woman speaks, and insects buzz"], "sample_ids": ["zTLVJCo4WEE", "t97k0cejSQE"], "start_seconds": ["30", "250"], "properties": ["a, crickets, sing", "sound, chirp, buzz"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "a bee on a purple thistle flower"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a bee buzzes and a woman speaks"], "question": "which entity has a higher pitch", "label": 1}, {"captions": ["some men converse over an engine running", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["sCiy7QS1U", "uEU-Hg5MTN8"], "start_seconds": ["300", "27"], "properties": ["men, converse, engine", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a joke", "label": 1}, {"captions": ["a motorcycle engine is idling", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vZAqdHZ81yA", "tDVADusiIoc"], "start_seconds": ["180", "60"], "properties": ["engine, motorcycle, idling", "water, radio, man"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["an engine is idling loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a motorcycle?", "label": 1}, {"captions": ["a motorcycle engine is idling", "a train horn blows as it passes by"], "sample_ids": ["vZAqdHZ81yA", "zVacuqSb4LI"], "start_seconds": ["180", "30"], "properties": ["engine, motorcycle, idling", "horn, blows, train"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["an engine is idling loudly", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which is louder", "label": 0}, {"captions": ["a clock ticktocks in wind", "an aircraft engine runs as wind blows heavily"], "sample_ids": ["yVumC9TGknc", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["ticktocks, clock, wind", "engine, run, wind"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a series of beeps and chirps", "a jet engine roars and wind blows "], "question": "which object is moving in the wind", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a child speaks in closed space"], "sample_ids": ["tOSWIURC-4", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["engine, work, nearby", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a lawn mower is running ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "pigeons vocalize and birds chirp"], "sample_ids": ["zl9Dqx-j7q4", "uiS58TNyUiw"], "start_seconds": ["6", "430"], "properties": ["motors rev, laugh, loudly", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a man driving a car in the dark", "of the pigeon in the cage"], "captions_pred_audio": ["a jet engine roars ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a jet engine spools up and takes off", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vBslzh7saPw", "zj2R0XoFr5k"], "start_seconds": ["90", "50"], "properties": ["engine, spools, takes", "airplane, boy, fly"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["xSKJGCItUWE", "su6FAOcOA8c"], "start_seconds": ["10", "4"], "properties": ["engine, run, boy", "engine, idle, woman"], "captions_pred_video": ["footage of the helicopter flying in the room", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a woman is speaking and a subway train is moving "], "question": "which entity has a running engine", "label": 0}, {"captions": ["a man speaks as horns blow", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["tHyNqRyK34A", "zFjIWfSD-4"], "start_seconds": ["24", "410"], "properties": ["a, man, speaks", "People, motor, brakes"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", null], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a toilet flushes and a female speaks", "an adult man speaks over glass clinking"], "sample_ids": ["yaln9y8I7ms", "u6jIvCtKarQ"], "start_seconds": ["230", "70"], "properties": ["female, flushes, toilet", "a, man, speaks"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a person using a blender on a stove top"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a man is speaking and dishes are being moved with background noise "], "question": "which entity is a man speaking over glass clinking?", "label": 1}, {"captions": ["a person screams glaringly", "a man speaks as a car is passing by"], "sample_ids": ["xC8kbrKJmco", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["glaringly, screams, person", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a goat is bleating ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more passive", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a person snores loudly multiple times at a close distance"], "sample_ids": ["xKB8O8LTs6s", "sSMl2vc3ek"], "start_seconds": ["70", "20"], "properties": ["music, radio, gunshots", "loud, multiple, distance"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", null], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a person snoring loudly"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vmrxwuAMb2I", "vfYTJq7nU"], "start_seconds": ["40", "130"], "properties": ["a dog, inhales, exhales", "rustling, ducks, quack"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", null], "captions_pred_audio": ["a dog barks and growls", "a duck quacks and a woman speaks"], "question": "which entity is about a dog?", "label": 0}, {"captions": ["a man speaks as a car is passing by", "multiple adults speaking, and a child shouting in the background"], "sample_ids": ["sK4u5T8hW78", "yks4cLgIDMc"], "start_seconds": ["30", "170"], "properties": ["a, car, pass", "background, speaking, child"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "footage of two kids wrestling on the floor"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and a child is crying"], "question": "which entity has a child shouting in the background?", "label": 1}, {"captions": ["a woman and man speak while food is frying", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["zk-xJGQU8-4", "uYT5gxnyMWM"], "start_seconds": ["130", "50"], "properties": ["food, man, woman", "female, spraying, scream"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a woman and man speaking?", "label": 0}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "an engine runs loudly"], "sample_ids": ["w6RTHR6AeAg", "vqZuVbG6-HI"], "start_seconds": ["40", "130"], "properties": ["call, owl, screech", "loud, engine, run"], "captions_pred_video": [null, "footage is blurry because it's raining outside"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "a person uses a saw to cut some wood"], "sample_ids": ["ylpYOorfH4o", "sHbXC6na9hg"], "start_seconds": ["410", "0"], "properties": ["engine, running, wind", "a person, saw, wood"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "a man using a tractor to cut a log into firewood youtube"], "captions_pred_audio": ["a man is speaking and an engine is revving", "an engine is idling and vibrating"], "question": "which entity is stationary", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "vehicles pass by on a roadway"], "sample_ids": ["zofjfKhqLk8", "tgbONvsP47Y"], "start_seconds": ["10", "0"], "properties": ["noise, stop, motor", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "water rushes and then a vehicle zooms past"], "sample_ids": ["ukxt9I7eMMg", "s4Uz1Ffgo04"], "start_seconds": ["30", "100"], "properties": ["food, pan, cook", "water, rushes, vehicle"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is moving", "label": 1}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "birds chirp and objects are moved around"], "sample_ids": ["ukg5L09Wpvo", "yPUYU6t3rwo"], "start_seconds": ["150", "370"], "properties": ["clickety-clack, train, whistle", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "insects buzz and a man speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["children cry and people talk", "an insect buzzes around continuously"], "sample_ids": ["xLwHe825Zs", "v25l1jef3JY"], "start_seconds": ["18", "0"], "properties": ["people talk, children cry, people talk", "buzzes, continuously, insect"], "captions_pred_video": [null, "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a baby cries and a woman speaks", "a fly is buzzing around a microphone "], "question": "which entity is not a person", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "a frog croaks as other frogs croak in the background"], "sample_ids": ["xfudFO976zE", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["animal, bleats, cry", "background, frog, croak"], "captions_pred_video": ["footage is blurry and shaky", "a close up of a frog in the water"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a frog is croaking"], "question": "which animal is more likely to be a frog?", "label": 1}, {"captions": ["birds chirp and a dog breathes heavily", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["y2ZBGpgbhHM", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["dog, chirp, breathe", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["birds chirping and a dog panting", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more active", "label": 1}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "wind blowing followed by a zoom"], "sample_ids": ["yNtRmrn0io8", "vr8ZXjEBhMQ"], "start_seconds": ["210", "150"], "properties": ["storm, distance, strike", "wind, blow, zoom"], "captions_pred_video": ["footage of a house in the middle of the night", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["rain falls and thunder roars", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more likely to cause damage", "label": 0}, {"captions": ["a man speaks as horns blow", "a car accelerates and wind blows"], "sample_ids": ["tHyNqRyK34A", "u0TrcHhkPQ"], "start_seconds": ["24", "20"], "properties": ["a, man, speaks", "accelerates, wind, blows"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", null], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tiDFTC-5vU", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["male, duck, laugh", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sfAvvZwdLCY", "xKB8O8LTs6s"], "start_seconds": ["20", "70"], "properties": ["flushes, drains, water", "music, gunfire, explosion"], "captions_pred_video": ["footage of the toilet in the bathroom", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a toilet is flushed", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "a clock ticktocks"], "sample_ids": ["spYNpeN7rPY", "v-g-j2uTByM"], "start_seconds": ["1", "30"], "properties": ["a clock, ticktock, man", "ticktocks, clock, ticktocks"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a clock is ticking loudly"], "question": "which entity has a clock ticktocks?", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a duck quacks loudly and continuously"], "sample_ids": ["vZAw4apG0Es", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["background, tick, repeat", "loud, continuous, quacks"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a clock is ticking and people are talking", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "an engine runs loudly"], "sample_ids": ["uiItxDsDMFI", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["wood, piece, saw", "loud, engine, run"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a saw is being used with background noise ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["se87d6yxEOA", "vbZ-0lGPneg"], "start_seconds": ["10", "30"], "properties": ["run, whistle, pass", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a moving object", "label": 0}, {"captions": ["a man speaks as he moves silverware in a bowl", "small dogs yip and bark sharply"], "sample_ids": ["x6ijhqRY38s", "v-wcQf4BDY0"], "start_seconds": ["250", "120"], "properties": ["bowl, silverware, man", "bark, yip, sharply"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vzxHnu-SFEw", "zl9Dqx-j7q4"], "start_seconds": ["80", "6"], "properties": ["two objects, woman, speak", "engine, laugh, loud"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "an small aircraft engine runs and a boy speaks"], "sample_ids": ["yJ0TePmaOo", "xSKJGCItUWE"], "start_seconds": ["390", "10"], "properties": ["two hard objects, man, speak", "engine, run, boy"], "captions_pred_video": [null, "footage of the helicopter flying in the room"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a high pitched engine is running and a child speaks"], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a sleeping person emits a gravely snore", "a woman speaks and other women and a man talk with her"], "sample_ids": ["w2JXXIAdUdg", "vbpKkWvfOu4"], "start_seconds": ["10", "560"], "properties": ["emits, sleeping, person", "a, woman, man"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a woman is speaking and a man is speaking"], "question": "which entity has more people", "label": 1}, {"captions": ["a stream of water flows quickly", "winds blows roughly as a vehicle races past"], "sample_ids": ["wbHTKEJZyhc", "xjvTpk2Zpr8"], "start_seconds": ["20", "70"], "properties": ["stream, water, flow", "wind, blows, vehicle"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a jet engine roars and wind blows "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a person whistles and clicks a mouse", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["zCrAfDfv6-A", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["person, mouse, click", "a, scream, girl"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a person whistles a song", "a woman is speaking and a baby is crying"], "question": "which entity is a person?", "label": 0}, {"captions": ["a sleeping person emits a gravely snore", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["w2JXXIAdUdg", "uZesmtKZGSw"], "start_seconds": ["10", "250"], "properties": ["emits, sleeping, person", "men, talk, cars"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "a woman talking as an infant is crying"], "sample_ids": ["xKB8O8LTs6s", "tMbMDvT50j8"], "start_seconds": ["70", "12"], "properties": ["music, gunfire, explosion", "a, talk, infant"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "shows a little girl covering her face with her hands while sitting at a table"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a baby cries and a woman speaks"], "question": "which entity is more calm", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "water flows as men speak and yell"], "sample_ids": ["ukxt9I7eMMg", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["food, pan, cook", "water, flow, men"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["a man laughs and speaks as cats purr and hiss", "small dogs yip and bark sharply"], "sample_ids": ["vVhthZ45k3Y", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["cat, purr, hiss", "bark, yip, sharply"], "captions_pred_video": ["footage is blurry and out of focus", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a dog barks and growls"], "question": "which animal is more playful", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "a vehicle engine accelerating then running on idle"], "sample_ids": ["zcDwZ6W7E3E", "vYkA3cfXp5Q"], "start_seconds": ["180", "30"], "properties": ["man, speak, motorcycles", "engine, accelerate, idle"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a drill runs and two people laugh", "a man speaks as a car is passing by"], "sample_ids": ["tEE3MpBt1sg", "sK4u5T8hW78"], "start_seconds": ["50", "30"], "properties": ["two people, laugh, drill", "a, car, pass"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a man speaking to a car passing by?", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "an infant crying frantically"], "sample_ids": ["uZesmtKZGSw", "zwOBqeFTgiU"], "start_seconds": ["250", "30"], "properties": ["men, talk, cars", "cry, infant, frantically"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["xl2PIWyXaM", "vfYTJq7nU"], "start_seconds": ["160", "130"], "properties": ["chirp, man, younger person", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and people are talking", "a duck quacks and a woman speaks"], "question": "which entity has more animals", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "popping and crackling repeats as men yell and laugh"], "sample_ids": ["x5cuQjOdM3E", "rqu8iB22IY"], "start_seconds": ["30", "5"], "properties": ["cat, meows, young woman", "sound, repeats, laugh"], "captions_pred_video": ["a black background with an airplane flying in the sky", null], "captions_pred_audio": ["a cat meows and a woman speaks", "a dog barks and a man speaks while music plays "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a large bell chimes back and forth loudly", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["w2M4i1mklOA", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["loud, chime, bell", "a, scream, girl"], "captions_pred_video": ["footage of an antique clock", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a woman is speaking and a baby is crying"], "question": "which entity is quieter", "label": 1}, {"captions": ["birds twitter and chirp and clatter", "a man speaks as a car is passing by"], "sample_ids": ["yeFvk9x0wWI", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["chirp, twitter, clatter", "a, car, pass"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is silent", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "someone whistles a tune"], "sample_ids": ["wztCSUxOf8", "sIXTftIuUgw"], "start_seconds": ["130", "90"], "properties": ["a crowd, yells, applauds", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a person whistling a song"], "question": "which entity is more quiet", "label": 1}, {"captions": ["vehicles pass by on a roadway", "birds chirp and objects are moved around"], "sample_ids": ["tgbONvsP47Y", "yPUYU6t3rwo"], "start_seconds": ["0", "370"], "properties": ["pass, vehicle, roadway", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of a fire truck entering a garage", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a car is driving on the road ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a toilet flushes and water drains", "a person is burping then speaks and laughs"], "sample_ids": ["sfAvvZwdLCY", "wAAkbZToh8"], "start_seconds": ["20", "0"], "properties": ["water drains, flushes, water", "burp, laugh, speak"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a man burps and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a child speaks in closed space", "wind blowing followed by a zoom"], "sample_ids": ["yW6FWLSLkx4", "vr8ZXjEBhMQ"], "start_seconds": ["40", "150"], "properties": ["child, space, speak", "wind, blow, zoom"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more likely to blow", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "someone is typing on a computer keyboard"], "sample_ids": ["tw76HGONaKg", "v0x1odnXtP0"], "start_seconds": ["570", "210"], "properties": ["A, game, keyboard", "keyboard, type, computer"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a person is typing on a keyboard"], "question": "which keyboard is used to type on a computer", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "water splashes and a door squeaks"], "sample_ids": ["sapQIQUhFc", "sdXV-ylviw"], "start_seconds": ["280", "190"], "properties": ["water, trickles, flow", "sound, splash, door"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a dog barks and taps with background noise "], "question": "which entity has a door that squeaks?", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "winds blows roughly as a vehicle races past"], "sample_ids": ["wqADXCzngMw", "xjvTpk2Zpr8"], "start_seconds": ["340", "70"], "properties": ["engine, idle, man", "wind, blows, vehicle"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a propeller moves loudly nearby", "a propeller rotates loudly and intensely"], "sample_ids": ["ugHJF0hfYkg", "ugHJF0hfYkg"], "start_seconds": ["10", "10"], "properties": ["loud, propeller, move", "loud, intense, propeller"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a helicopter is flying overhead ", "a helicopter is flying overhead "], "question": "which propeller is louder", "label": 1}, {"captions": ["a railroad crossing bell rings as a train horn blows", "an engine runs loudly"], "sample_ids": ["tZGN5a7ybxo", "vqZuVbG6-HI"], "start_seconds": ["60", "130"], "properties": ["ring, train, horn", "loud, engine, run"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a train is moving and blowing its horn ", "a lawn mower is running and men are speaking "], "question": "which train is louder", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zY3icUyMdh8", "yajyRTUQk3U"], "start_seconds": ["20", "400"], "properties": ["dog, bark, engine", "a woman, something, fried"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a person snoring several times"], "sample_ids": ["yYEVLuqEytU", "spJCm8tD9Zo"], "start_seconds": ["40", "90"], "properties": ["animal, pig, background", "snore, person, several"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a person is snoring loudly"], "question": "which entity is a person?", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "two women and a man talk while a kid cries"], "sample_ids": ["vimzuGQvdcU", "wyllXV6PjKo"], "start_seconds": ["30", "30"], "properties": ["a, man, yells", "a kid, talk, cry"], "captions_pred_video": ["a group of people are rafting down a river", null], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a woman speaks and a baby cries"], "question": "which entity has a kid?", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "a woman speaks happily and an animal chirps"], "sample_ids": ["vddP56-ogds", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["water, flow, laugh", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a woman is speaking and a dog is barking "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a toilet flushes and water drains unevenly"], "sample_ids": ["vbZ-0lGPneg", "vhJWZheqaE"], "start_seconds": ["30", "0"], "properties": ["a woman, a television program, a bird", "water drains unevenly, toilet flushes, water drains"], "captions_pred_video": ["of a man holding a baby duck in his hands", null], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a toilet is flushed"], "question": "which entity has more water", "label": 1}, {"captions": ["a male speaks and another male speaks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["viuTg1M-dqg", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["two males, speaking, male", "loud, multiple, distance"], "captions_pred_video": ["footage of water coming out of a hole in the ground", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["tEE3MpBt1sg", "xfaoyyzw2WU"], "start_seconds": ["50", "180"], "properties": ["drill, something, laugh", "loud, jet engine, roar"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "a toilet flushes and a female speaks"], "sample_ids": ["y8dSeubCNI", "yaln9y8I7ms"], "start_seconds": ["4", "230"], "properties": ["engine revving, people speaking, motorcycle", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["an engine revving and people talking in the background", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["a man is filing a hard object", "a train horn blows as it passes by"], "sample_ids": ["vveS8HT7Uog", "zVacuqSb4LI"], "start_seconds": ["100", "30"], "properties": ["a man, hard, object", "horn, blows, train"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which object is louder", "label": 0}, {"captions": ["a person sneezes followed by another person speaking", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["t8CV69hcvF0", "uZesmtKZGSw"], "start_seconds": ["210", "250"], "properties": ["person, sneeze, follow", "men, talk, cars"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a woman sneezes and speaks", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "a infant makes noise and is excited"], "sample_ids": ["zkKdxzNC97Y", "wIJK3-5y0kA"], "start_seconds": ["27", "30"], "properties": ["loud, bang, noise", "noise, excited, infant"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a door is opened and closed", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "a man speaks as a car is passing by"], "sample_ids": ["su6FAOcOA8c", "sK4u5T8hW78"], "start_seconds": ["4", "30"], "properties": ["engine, idle, woman", "a, car, pass"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["distant men speak as a spray can nozzle is depressed", "an infant crying as a woman laughs"], "sample_ids": ["rwtmaKiCcQU", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["nozzle, depressed, spray can", "a, laugh, infant"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["spraying and people speaking", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "water flows and trickles"], "sample_ids": ["yajyRTUQk3U", "tB7hWb9gTuQ"], "start_seconds": ["400", "30"], "properties": ["noise, woman, speak", "water, flow, trickle"], "captions_pred_video": ["- a woman cooking in the kitchen", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a small engine idles continuously", "vehicles pass by on a roadway"], "sample_ids": ["y5WII6cTH7k", "tgbONvsP47Y"], "start_seconds": ["40", "0"], "properties": ["engine, idle, continuously", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "footage of a fire truck entering a garage"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vddP56-ogds", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["water, flow, laugh", "male, duck, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking?", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "pigeons vocalize and birds chirp"], "sample_ids": ["yDoT73BWsdA", "uiS58TNyUiw"], "start_seconds": ["10", "430"], "properties": ["engine, revs, vehicle", "vocalize, bird, chirp"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "of the pigeon in the cage"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["an engine runs loudly", "water pouring and bubbling"], "sample_ids": ["vqZuVbG6-HI", "uyRfq-jKPpo"], "start_seconds": ["130", "50"], "properties": ["loud, engine, run", "water, bubbles, pouring"], "captions_pred_video": ["footage is blurry because it's raining outside", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "water is running from a faucet"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["shmR4OZtzqA", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["man, engine, idle", "animal, grunts, snorts"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man speaks while a motor runs", "a woman is speaking and a baby is crying"], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["a helicopter engine runs continuously", "a infant makes noise and is excited"], "sample_ids": ["ugHJF0hfYkg", "wIJK3-5y0kA"], "start_seconds": ["10", "30"], "properties": ["engine, running, continuously", "noise, excited, infant"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a helicopter is flying overhead ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "a infant makes noise and is excited"], "sample_ids": ["zl9Dqx-j7q4", "wIJK3-5y0kA"], "start_seconds": ["6", "30"], "properties": ["motors rev, laugh, loudly", "noise, excited, infant"], "captions_pred_video": ["footage of a man driving a car in the dark", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a jet engine roars ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "a machine beeps continuously"], "sample_ids": ["sQwlkXjQabo", "y682ml90jGw"], "start_seconds": ["10", "11"], "properties": ["liquid, surface, spray", "beeps, machine, continuously"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", null], "captions_pred_audio": ["spraying followed by silence", "a beeping sound is being made "], "question": "which entity is not silent", "label": 1}, {"captions": ["a person whistles a meandering tune", "a man speaks as a motor runs in the background"], "sample_ids": ["uFoga8sHpiw", "xZepNM9qcRA"], "start_seconds": ["90", "30"], "properties": ["person, tune, whistle", "background, motor, run"], "captions_pred_video": ["footage of a bird in a cage", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a person whistles a song", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 0}, {"captions": ["a dog whimpers and a woman briefly talks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["y1saVTXsKwc", "vb1fPSDI4c"], "start_seconds": ["80", "30"], "properties": ["a, dog, talk", "multiple, people, yell"], "captions_pred_video": ["a dog playing with a pink ball", null], "captions_pred_audio": ["a dog barks and a man speaks", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["a clock ticktocks briefly", "a duck quacks continuously"], "sample_ids": ["u7C-AEBQM", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, ticktocks briefly", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a ticktock of a clock", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["xfaoyyzw2WU", "wqZ135Ssz0"], "start_seconds": ["180", "60"], "properties": ["loud, jet engine, roar", "two men, woman, birds"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", null], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which is quieter", "label": 1}, {"captions": ["dogs barking and whimpering", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["tIY7qOV3rEM", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["barking, whimpering, dog", "a, scream, girl"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a woman is speaking and a baby is crying"], "question": "which entity is a human", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["tDVADusiIoc", "vfYTJq7nU"], "start_seconds": ["60", "130"], "properties": ["man, radio, blows", "rustling, ducks, quack"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a duck quacks and a woman speaks"], "question": "which entity is about a man speaking on a radio as wind blows?", "label": 0}, {"captions": ["vehicles pass by on a roadway", "a machine beeps continuously"], "sample_ids": ["tgbONvsP47Y", "y682ml90jGw"], "start_seconds": ["0", "11"], "properties": ["pass, vehicle, roadway", "beeps, machine, continuously"], "captions_pred_video": ["footage of a fire truck entering a garage", null], "captions_pred_audio": ["a car is driving on the road ", "a beeping sound is being made "], "question": "which entity is not a vehicle?", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "an infant crying frantically"], "sample_ids": ["tEE3MpBt1sg", "zwOBqeFTgiU"], "start_seconds": ["50", "30"], "properties": ["drill, something, laugh", "cry, infant, frantically"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "of the baby crying in the car seat"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "several insects fly while two men talk"], "sample_ids": ["s7knHCFW82w", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["blow horn, get close, train", "several, fly, men"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a moving object", "label": 0}, {"captions": ["continuous sizzling with a woman speaking towards the end", "some men converse over an engine running"], "sample_ids": ["ukxt9I7eMMg", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["continuous, woman, speaking", "men, converse, engine"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a video of a woman speaking?", "label": 0}, {"captions": ["paper folding and crinkling", "a toilet flushes and water sputters as it drains"], "sample_ids": ["zPpG3RD8lSs", "smGI3C1NZc"], "start_seconds": ["20", "30"], "properties": ["paper, fold, crinkle", "water, drain, toilet"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", null], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "a car accelerates and wind blows"], "sample_ids": ["u21-Z5gJCB8", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["background, voice, man", "accelerates, wind, blows"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a child speaks", "vehicles pass by on a roadway"], "sample_ids": ["yW6FWLSLkx4", "tgbONvsP47Y"], "start_seconds": ["40", "0"], "properties": ["a, child, speaks", "pass, vehicle, roadway"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a goat screams and people speak in the background", "people speak as gunfire rings out"], "sample_ids": ["xC8kbrKJmco", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["background, goat, scream", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a goat is bleating ", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["people clap and speak in the distance", "a car accelerates and wind blows"], "sample_ids": ["wwyfGO2J4", "u0TrcHhkPQ"], "start_seconds": ["90", "20"], "properties": ["clap, distance, speak", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "pigeons vocalize and birds chirp"], "sample_ids": ["xKB8O8LTs6s", "uiS58TNyUiw"], "start_seconds": ["70", "430"], "properties": ["music, radio, gunshots", "vocalize, bird, chirp"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "of the pigeon in the cage"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a car speeding up in the distance", "paper folding and crinkling"], "sample_ids": ["u0TrcHhkPQ", "zPpG3RD8lSs"], "start_seconds": ["20", "20"], "properties": ["distance, car, speed", "paper, fold, crinkle"], "captions_pred_video": [null, "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "the wind blows and a mouse clicks "], "question": "which is not a car", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["tK4VlLsNxak", "xKB8O8LTs6s"], "start_seconds": ["120", "70"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "music, gunfire, explosion"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "a clock ticktocks"], "sample_ids": ["wvKpEYswXO0", "v-g-j2uTByM"], "start_seconds": ["150", "30"], "properties": ["plastic, tap, speak", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of the person preparing food in the kitchen", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "a group of people chatter and talk as multiple horns honk in the background"], "sample_ids": ["ziUT9IFTkjg", "yLy-WycbVVE"], "start_seconds": ["10", "30"], "properties": ["background, birds, rustling", "background, people, talk"], "captions_pred_video": [null, "a soccer field in a stadium with yellow and red seats"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a man is speaking and a church bell is ringing with wind noise in the background "], "question": "which entity has a background of birds chirping?", "label": 0}, {"captions": ["someone snores nearby", "pigeons vocalize and birds chirp"], "sample_ids": ["spJCm8tD9Zo", "uiS58TNyUiw"], "start_seconds": ["90", "430"], "properties": ["someone snores, nearby, someone", "vocalize, bird, chirp"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of the pigeon in the cage"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "a clock ticktocks"], "sample_ids": ["vf44CgrjT0A", "v-g-j2uTByM"], "start_seconds": ["20", "30"], "properties": ["loud, long, person", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a loud burp", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["sjlVMgdGSK0", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["accelerates, vehicle, race car", "harsh, wind, blows"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking with wind noise in the background "], "question": "which entity is a moving object", "label": 0}, {"captions": ["small dogs growl, bark and yip.", "a car accelerates and wind blows"], "sample_ids": ["sShpyu2l4YQ", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["growl, bark, yip", "accelerates, wind, blows"], "captions_pred_video": ["the puppies are playing with a toy", null], "captions_pred_audio": ["a dog is barking and growling", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "a clock ticktocks"], "sample_ids": ["y8dSeubCNI", "v-g-j2uTByM"], "start_seconds": ["4", "30"], "properties": ["engine revving, people speaking, motorcycle", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["an engine revving and people talking in the background", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaks, then dials a rotary telephone", "water flows as men speak and yell"], "sample_ids": ["tK4VlLsNxak", "vJ7JPEFhyLA"], "start_seconds": ["120", "16"], "properties": ["a, dial, telephone", "water, flow, men"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a man speaking and dialing a rotary telephone?", "label": 0}, {"captions": ["a person speaks briefly", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["zOZleIRqZm4", "vfYTJq7nU"], "start_seconds": ["80", "130"], "properties": ["person, talk, brief", "rustling, ducks, quack"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a duck quacks and a woman speaks"], "question": "which entity is about a person talking?", "label": 0}, {"captions": ["a man sprays as a scraping occurs in the background", "winds blows roughly as a vehicle races past"], "sample_ids": ["sOa7g-44Dag", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["background, man, spray", "wind, blows, vehicle"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sxYkFKFIZD0", "vJ7JPEFhyLA"], "start_seconds": ["20", "16"], "properties": ["screech, man, door", "three men, wind, flow"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about liquid flowing?", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "a child speaks in closed space"], "sample_ids": ["spJCm8tD9Zo", "yW6FWLSLkx4"], "start_seconds": ["90", "40"], "properties": ["snores, wheezes, sleeps", "child, space, speak"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vb1fPSDI4c", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["multiple, people, yell", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a woman is speaking while food is frying in the background"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["motors runs briefly and tires screech", "a car accelerates and wind blows"], "sample_ids": ["yRx9txMcBl0", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["motors, tires, screech", "accelerates, wind, blows"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", null], "captions_pred_audio": ["a car is revving its engine and skidding ", "a race car accelerates and revs its engine "], "question": "which entity is a car?", "label": 1}, {"captions": ["a dog barks and whimpers", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["sShpyu2l4YQ", "zl9Dqx-j7q4"], "start_seconds": ["0", "6"], "properties": ["barks, whimpers, dog", "engine, laugh, loud"], "captions_pred_video": ["the puppies are playing with a toy", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a dog is barking and growling", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a toilet flushes and water drains", "a child speaks in closed space"], "sample_ids": ["sfAvvZwdLCY", "yW6FWLSLkx4"], "start_seconds": ["20", "40"], "properties": ["water drains, flushes, water", "child, space, speak"], "captions_pred_video": ["footage of the toilet in the bathroom", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["a woman speaks followed by clicks and scraping", "a stream of water flows as people talk and wind blows"], "sample_ids": ["yYJksgsxx5U", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["audio, clicks, scraping", "stream, water, flow"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing?", "label": 1}, {"captions": ["white noise and birds chirping", "birds chirp and objects are moved around"], "sample_ids": ["wRBHTgrbiwg", "yPUYU6t3rwo"], "start_seconds": ["50", "370"], "properties": ["noise, white, chirping", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "insects buzz and a man speaks"], "question": "which entity is more like a video", "label": 1}, {"captions": ["water flows and trickles", "a car speeding up in the distance"], "sample_ids": ["tB7hWb9gTuQ", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["water, flow, trickle", "distance, car, speed"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", null], "captions_pred_audio": ["water is splashing and gurgling", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a car accelerates and wind blows", "water is sprayed across a hard surface"], "sample_ids": ["u0TrcHhkPQ", "sQwlkXjQabo"], "start_seconds": ["20", "10"], "properties": ["accelerates, wind, blows", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "a car accelerates and wind blows"], "sample_ids": ["xfudFO976zE", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["animal, bleats, cry", "accelerates, wind, blows"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "a woman speaks as she rubs two objects together"], "sample_ids": ["xyL9F5VrjkE", "vzxHnu-SFEw"], "start_seconds": ["20", "80"], "properties": ["wind, motor, distance", "two objects, woman, speak"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a motor?", "label": 0}, {"captions": ["a woman speaks and then a man speaks", "water flows and trickles"], "sample_ids": ["vbpKkWvfOu4", "tB7hWb9gTuQ"], "start_seconds": ["560", "30"], "properties": ["a, man, speaks", "water, flow, trickle"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a child speaks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["yW6FWLSLkx4", "vfYTJq7nU"], "start_seconds": ["40", "130"], "properties": ["a, child, speaks", "rustling, ducks, quack"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", null], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a duck quacks and a woman speaks"], "question": "which entity is about a child speaking?", "label": 0}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "a car accelerates and wind blows"], "sample_ids": ["tDlfY3nmx1A", "u0TrcHhkPQ"], "start_seconds": ["160", "20"], "properties": ["applause, laugh, man", "accelerates, wind, blows"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", null], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man woman speak while crickets sing", "a woman speaks happily and an animal chirps"], "sample_ids": ["zTLVJCo4WEE", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["a, crickets, sing", "a woman, chirps, animal"], "captions_pred_video": ["- a boy with a rifle aiming at a target", null], "captions_pred_audio": ["a woman speaks and crickets chirp", "a woman is speaking and a dog is barking "], "question": "which entity has a man and woman speaking?", "label": 0}, {"captions": ["long loud burping by a man", "a man speaks as a car is passing by"], "sample_ids": ["xmiUIOhtZyQ", "sK4u5T8hW78"], "start_seconds": ["60", "30"], "properties": ["loud, burp, man", "a, car, pass"], "captions_pred_video": ["homer simpson drinking a beer", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a person burps and music plays in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a propeller rotates loudly and intensely"], "sample_ids": ["sfAvvZwdLCY", "ugHJF0hfYkg"], "start_seconds": ["20", "10"], "properties": ["water drains, flushes, water", "loud, intense, propeller"], "captions_pred_video": ["footage of the toilet in the bathroom", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a toilet is flushed", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["a male speaks over some small clicks", "small dogs yip and bark sharply"], "sample_ids": ["uXxVebHsGZ8", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["male, clicks, speak", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "an infant crying as a woman laughs"], "sample_ids": ["sZPuqDgX2V0", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["engine, accelerate, intercom", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["yajyRTUQk3U", "vbZ-0lGPneg"], "start_seconds": ["400", "30"], "properties": ["noise, woman, speak", "a woman, a television program, a bird"], "captions_pred_video": ["- a woman cooking in the kitchen", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a woman is speaking and a dog is whimpering"], "question": "which woman is speaking over sizzling noise", "label": 0}, {"captions": ["a young woman speaks over spraying and another person yells", "three men talk while wind blows and some liquid flows"], "sample_ids": ["uYT5gxnyMWM", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["person, spray, yell", "three men, wind, flow"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows a person speaking over spraying and another person yelling?", "label": 0}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a vehicle accelerates squealing tires"], "sample_ids": ["weDbePuc-Xc", "sd7xVssqlw"], "start_seconds": ["40", "50"], "properties": ["music, slaps, human", "accelerates, tires, squealing"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", null], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["wind blows strongly and a young man speaks", "a toilet flushes and a female speaks"], "sample_ids": ["vs65y4qmyBE", "yaln9y8I7ms"], "start_seconds": ["340", "230"], "properties": ["wind, blows, strongly", "female, flushes, toilet"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "footage is blurry and out of focus"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a toilet flushes and a man speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "winds blows roughly as a vehicle races past"], "sample_ids": ["zcDwZ6W7E3E", "xjvTpk2Zpr8"], "start_seconds": ["180", "70"], "properties": ["a, man, speak", "wind, blows, vehicle"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a jet engine roars and wind blows "], "question": "which entity is about a vehicle racing past?", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "several insects fly while two men talk"], "sample_ids": ["ylpYOorfH4o", "s-T9OVOiMLo"], "start_seconds": ["410", "330"], "properties": ["engine, running, wind", "several, fly, men"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more flying insects", "label": 1}, {"captions": ["some men converse over an engine running", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sCiy7QS1U", "wz7N8YRy74I"], "start_seconds": ["300", "30"], "properties": ["men, converse, engine", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has more people", "label": 1}, {"captions": ["a train horn blares as a train passes, then fades", "vehicles pass by on a roadway"], "sample_ids": ["zVacuqSb4LI", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["blares, fades, train", "pass, vehicle, roadway"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a car is driving on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "water splashes and a door squeaks"], "sample_ids": ["vb1fPSDI4c", "sdXV-ylviw"], "start_seconds": ["30", "190"], "properties": ["multiple, people, yell", "sound, splash, door"], "captions_pred_video": [null, null], "captions_pred_audio": ["a crowd of people are talking and laughing", "a dog barks and taps with background noise "], "question": "which entity has a door squeaking?", "label": 1}, {"captions": ["a man speaks as a machine runs", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vD6lYD1l0BY", "vbZ-0lGPneg"], "start_seconds": ["330", "30"], "properties": ["a, machine, run", "a woman, a television program, a bird"], "captions_pred_video": ["game controller being held in the hands of the person", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "a weapon fires multiple times"], "sample_ids": ["zj2R0XoFr5k", "sMC07Ucy7kg"], "start_seconds": ["50", "10"], "properties": ["airplane, boy, fly", "weapon, fire, multiple"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage is from a car's point of view"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["water splashes as an animal walks through", "water splashes and a door squeaks"], "sample_ids": ["w1ir-sZ3Im8", "sdXV-ylviw"], "start_seconds": ["90", "190"], "properties": ["animal, water, splashes", "sound, splash, door"], "captions_pred_video": ["footage of a group of people riding horses through a river", null], "captions_pred_audio": ["water splashes and gurgles as people speak", "a dog barks and taps with background noise "], "question": "which entity is more likely to be a video of a door squeaking?", "label": 1}, {"captions": ["a railroad crossing bell rings as a train horn blows", "an infant crying as a woman laughs"], "sample_ids": ["tZGN5a7ybxo", "xhmRY9yhC7c"], "start_seconds": ["60", "20"], "properties": ["ring, train, horn", "a, laugh, infant"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a train is moving and blowing its horn ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["t25U-v4k4ts", "w34HjHr6gAY"], "start_seconds": ["40", "30"], "properties": ["a, chirps, bird", "beeps, hit, woman"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["wind blowing followed by a zoom", "a machine engine runs and a man speaks"], "sample_ids": ["vr8ZXjEBhMQ", "vs65y4qmyBE"], "start_seconds": ["150", "340"], "properties": ["wind, blow, zoom", "engine, run, man"], "captions_pred_video": ["is taken from a motorcycle's point of view", "a car is engulfed in flames on the side of the road"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a heavy engine is running and men are speaking "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["water pouring and bubbling", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["uyRfq-jKPpo", "uEU-Hg5MTN8"], "start_seconds": ["50", "27"], "properties": ["water, bubbles, pouring", "a woman, laughs, animal"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["water is running from a faucet", "a woman is speaking and a baby is crying"], "question": "which entity is a video", "label": 1}, {"captions": ["a person is snoring while sleeping", "a woman speaks and taps on a hard surface before running tap water"], "sample_ids": ["vJrjSeP17yE", "wvKpEYswXO0"], "start_seconds": ["40", "150"], "properties": ["a person is sleeping, snoring, person", "water, tap, run"], "captions_pred_video": ["a black background with a small plane flying in the sky", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is a person", "label": 0}, {"captions": ["a crowd yells, reacts and applauds", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["wztCSUxOf8", "ukg5L09Wpvo"], "start_seconds": ["130", "150"], "properties": ["a crowd, yells, applauds", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a train blows its whistle and blows its horn "], "question": "which entity is quieter", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "some tunes played by whistling"], "sample_ids": ["w2JXXIAdUdg", "u6BnG6YZqJ4"], "start_seconds": ["10", "0"], "properties": ["snoring, distance, person", "tune, play, whistling"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a person whistling a song"], "question": "which entity is played by whistling", "label": 1}, {"captions": ["an infant crying as a woman laughs", "a person is whistling"], "sample_ids": ["xhmRY9yhC7c", "sIXTftIuUgw"], "start_seconds": ["20", "90"], "properties": ["a, laugh, infant", "person, whistling, person"], "captions_pred_video": ["of a baby crying in a baby bouncer", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a person whistling a song"], "question": "which person is whistling", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["uWAAAL4CIoc", "wDVMhEdTiVw"], "start_seconds": ["0", "30"], "properties": ["a woman, chirps, animal", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to cause harm", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "a stream of water runs briefly"], "sample_ids": ["xzKKf9bKNUo", "x-PeY8Yb8M4"], "start_seconds": ["10", "300"], "properties": ["background, noise, snoring", "stream, water, run"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a person snoring loudly", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "some men converse over an engine running"], "sample_ids": ["zY3icUyMdh8", "sCiy7QS1U"], "start_seconds": ["20", "300"], "properties": ["dog, bark, engine", "men, converse, engine"], "captions_pred_video": ["footage of a bus driving through a residential street at night", null], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a dog barking and a vehicle engine idling followed shortly by vehicle engine revving?", "label": 0}, {"captions": ["a woman and man speak while food is frying", "a train horn blows as it passes by"], "sample_ids": ["zk-xJGQU8-4", "zVacuqSb4LI"], "start_seconds": ["130", "30"], "properties": ["food, man, woman", "horn, blows, train"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train", "label": 1}, {"captions": ["continuous snoring", "birds chirp and objects are moved around"], "sample_ids": ["sLkeqCDJIyw", "yPUYU6t3rwo"], "start_seconds": ["120", "370"], "properties": ["loud, snoring, noise", "birds chirp, objects are moved around, birds"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a person is snoring loudly", "insects buzz and a man speaks"], "question": "which entity is quieter", "label": 1}, {"captions": ["speaking following by laughing and clapping", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["u2f5NpsoHBg", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["person, laugh, clap", "a woman, something, fried"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["an adult woman speaks over chopping and silverware noises", "a telephone rings followed by a woman talking"], "sample_ids": ["yYJksgsxx5U", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["audio, woman, silverware", "ring, talk, woman"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", null], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a dial tone sounds followed by a woman speaking"], "question": "which woman is talking", "label": 1}, {"captions": ["food is frying then a woman speaks", "a toilet flushes and a female speaks"], "sample_ids": ["ukxt9I7eMMg", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["food, woman, speak", "female, flushes, toilet"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a toilet flushes and a man speaks"], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["wnpJndXuxLc", "su6FAOcOA8c"], "start_seconds": ["50", "4"], "properties": ["blows, vehicle, train", "engine, idle, woman"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "water pouring and bubbling"], "sample_ids": ["y8dSeubCNI", "uyRfq-jKPpo"], "start_seconds": ["4", "50"], "properties": ["engine revving, people speaking, motorcycle", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["an engine revving and people talking in the background", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks while water drains", "a toilet flushes and a female speaks"], "sample_ids": ["vSeGhaZt-aI", "yaln9y8I7ms"], "start_seconds": ["50", "230"], "properties": ["water, drain, man", "female, flushes, toilet"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["a person speaks briefly", "an infant crying frantically"], "sample_ids": ["zOZleIRqZm4", "zwOBqeFTgiU"], "start_seconds": ["80", "30"], "properties": ["person, talk, brief", "cry, infant, frantically"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["rain falls on a surface as men speak and thunder roars", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["w0xsN8X18Y", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["rain, thunder, surface", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is about a man speaking and a rooster crows?", "label": 1}, {"captions": ["a clicking followed by some people laughing and a kid speaking", "a woman speaks as she rubs two objects together"], "sample_ids": ["vz8868znkVQ", "vzxHnu-SFEw"], "start_seconds": ["60", "80"], "properties": ["audio, click, kid speaking", "two objects, woman, speak"], "captions_pred_video": ["a video of a plane flying over a cloudy sky", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a baby is laughing and breathing with background noise ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["u--KhUW8l1Y", "wDVMhEdTiVw"], "start_seconds": ["0", "30"], "properties": ["horn, siren, life", "gun, shoot, water"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a gun", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "water is sprayed across a hard surface"], "sample_ids": ["ul60S8TXDA8", "sQwlkXjQabo"], "start_seconds": ["60", "10"], "properties": ["sound, distance, bell", "water, spray, surface"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["some tunes played by whistling", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["u6BnG6YZqJ4", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["tune, play, whistling", "female, spraying, scream"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a person whistling a song", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "water pouring and bubbling"], "sample_ids": ["uZesmtKZGSw", "uyRfq-jKPpo"], "start_seconds": ["250", "50"], "properties": ["men, talk, cars", "water, bubbles, pouring"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "water is running from a faucet"], "question": "which entity is more calm", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "paper folding and crinkling"], "sample_ids": ["yZmhM1HcsyE", "zPpG3RD8lSs"], "start_seconds": ["4", "20"], "properties": ["engine, roar, water", "paper, fold, crinkle"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "the wind blows and a mouse clicks "], "question": "which entity is more likely to be a paper craft", "label": 1}, {"captions": ["a male speaks over some small clicks", "a woman speaks happily and an animal chirps"], "sample_ids": ["uXxVebHsGZ8", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["male, clicks, speak", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a woman is speaking and a dog is barking "], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["motors runs briefly and tires screech", "some men converse over an engine running"], "sample_ids": ["yRx9txMcBl0", "sCiy7QS1U"], "start_seconds": ["40", "300"], "properties": ["motors, tires, screech", "men, converse, engine"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", null], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows a person talking to someone?", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vSeGhaZt-aI", "uYT5gxnyMWM"], "start_seconds": ["50", "50"], "properties": ["water, bubbles, speak", "a, scream, girl"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "a motor vehicle roars, drowning out people speaking in the background"], "sample_ids": ["ylpYOorfH4o", "s4Uz1Ffgo04"], "start_seconds": ["410", "100"], "properties": ["engine, run, loud", "roars, background, people speaking"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "a woman speaks as she rubs two objects together"], "sample_ids": ["t8CV69hcvF0", "vzxHnu-SFEw"], "start_seconds": ["210", "80"], "properties": ["person, sneeze, follow", "two objects, woman, speak"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a woman sneezes and speaks", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a skill", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "multiple people speak and children yell while water gurgles"], "sample_ids": ["w5W5Kqtc8E", "vb1fPSDI4c"], "start_seconds": ["100", "30"], "properties": ["wind, blow, vehicle", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a crowd of people are talking and laughing"], "question": "which entity has more people yelling", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "people applaud and hoot and chat quietly"], "sample_ids": ["s4Uz1Ffgo04", "wwyfGO2J4"], "start_seconds": ["100", "90"], "properties": ["water, rushes, motorcycle", "people, applaud, hoot"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "winds blows roughly as a vehicle races past"], "sample_ids": ["tOj4tdLRaA", "xjvTpk2Zpr8"], "start_seconds": ["70", "70"], "properties": ["woman, laugh, baby", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["water flows as men speak and yell", "water splashes as an animal walks through"], "sample_ids": ["vJ7JPEFhyLA", "w1ir-sZ3Im8"], "start_seconds": ["16", "90"], "properties": ["water, flow, men", "animal, water, splashes"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "water splashes and gurgles as people speak"], "question": "which entity is more calm", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a duck quacks several times"], "sample_ids": ["uzQnlJXBbOM", "vh30P49Po6s"], "start_seconds": ["50", "30"], "properties": ["ringing, beep, stop", "quacks, duck, several"], "captions_pred_video": ["footage of a person using a cell phone on a table", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a telephone rings and a man speaks", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a man speaks as a car is passing by", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sK4u5T8hW78", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["a, car, pass", "men, talk, cars"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "a woman speaks happily and an animal chirps"], "sample_ids": ["tOj4tdLRaA", "uWAAAL4CIoc"], "start_seconds": ["70", "0"], "properties": ["woman, laugh, baby", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a woman is speaking and a dog is barking "], "question": "which entity has a baby in it", "label": 0}, {"captions": ["a woman speaks as she rubs two objects together", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vzxHnu-SFEw", "xBxDz0CFVn0"], "start_seconds": ["80", "30"], "properties": ["two objects, woman, speak", "stream, water, flow"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman talking as an infant is crying", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tMbMDvT50j8", "tdWhHV3X25Q"], "start_seconds": ["12", "60"], "properties": ["a, talk, infant", "applause, audience, yells"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["water splashes as an animal walks through", "someone whistles a tune"], "sample_ids": ["w1ir-sZ3Im8", "sIXTftIuUgw"], "start_seconds": ["90", "90"], "properties": ["animal, water, splashes", "someone, tune, whistle"], "captions_pred_video": ["footage of a group of people riding horses through a river", null], "captions_pred_audio": ["water splashes and gurgles as people speak", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a woman speaks with water running", "a stream of water runs briefly"], "sample_ids": ["wTideSjRFS0", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["water, running, woman", "stream, water, run"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["people speak then an engine runs", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["uMTTDZ2mb4", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["engine, run, people", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["an engine runs loudly", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["vqZuVbG6-HI", "vlS6YMeWAPo"], "start_seconds": ["130", "40"], "properties": ["loud, engine, run", "sheep, baa, birds"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a goat bleats and birds chirp"], "question": "which entity is quieter", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "water flows and trickles"], "sample_ids": ["rwtmaKiCcQU", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["nozzle, depressed, spray can", "water, flow, trickle"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["spraying and people speaking", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["water flows as men speak and yell", "people cheer as a vehicle engine revs"], "sample_ids": ["vJ7JPEFhyLA", "xjhAnI2q6hM"], "start_seconds": ["16", "6"], "properties": ["water, flow, men", "engine revs, vehicle, people"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a truck is revving its engine and a man is speaking "], "question": "which entity is more likely to be in a vehicle?", "label": 1}, {"captions": ["a duck quacks continuously", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vh30P49Po6s", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["quacks, continuously, duck", "a woman, laughs, animal"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a duck is quacking loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "a woman speaks and is crumpling paper"], "sample_ids": ["vlS6YMeWAPo", "xvDdE3zNf8Y"], "start_seconds": ["40", "120"], "properties": ["noise, bleat, call", "A, crumple, paper"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "of a woman in a white shirt and glasses holding a purple tie"], "captions_pred_audio": ["a goat bleats and birds chirp", "a woman speaks and crumples paper"], "question": "which entity is crumpling paper", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "water is sprayed across a hard surface"], "sample_ids": ["zl9Dqx-j7q4", "sQwlkXjQabo"], "start_seconds": ["6", "10"], "properties": ["engine, laugh, loud", "water, spray, surface"], "captions_pred_video": ["footage of a man driving a car in the dark", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a jet engine roars ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a person snores loudly multiple times at a close distance", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["sSMl2vc3ek", "vfYTJq7nU"], "start_seconds": ["20", "130"], "properties": ["loud, multiple, distance", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person snoring loudly", "a duck quacks and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "people cheer as a vehicle engine revs"], "sample_ids": ["s4Uz1Ffgo04", "xjhAnI2q6hM"], "start_seconds": ["100", "6"], "properties": ["water, rushes, motorcycle", "engine revs, vehicle, people"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a stream of water flows quickly", "a clock ticktocks"], "sample_ids": ["wbHTKEJZyhc", "v-g-j2uTByM"], "start_seconds": ["20", "30"], "properties": ["stream, water, flow", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a clock is ticking loudly"], "question": "which entity is a clock", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "a clock ticktocks"], "sample_ids": ["vimzuGQvdcU", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["a, man, yells", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a group of people are rafting down a river", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["s4Uz1Ffgo04", "wz7N8YRy74I"], "start_seconds": ["100", "30"], "properties": ["roars, background, people speaking", "rooster, crow, background, men"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "an engine runs loudly"], "sample_ids": ["sZPuqDgX2V0", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["engine, accelerate, intercom", "loud, engine, run"], "captions_pred_video": [null, "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a lawn mower is running and men are speaking "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["a horn honks and then loudly blares", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wnpJndXuxLc", "xKB8O8LTs6s"], "start_seconds": ["50", "70"], "properties": ["horn, honk, loud", "music, gunfire, explosion"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["birds chirp, a woman speaks, and insects buzz", "a frog croaks as other frogs croak in the background"], "sample_ids": ["t97k0cejSQE", "yswmmRZFItk"], "start_seconds": ["250", "0"], "properties": ["sound, chirp, buzz", "background, frog, croak"], "captions_pred_video": ["a bee on a purple thistle flower", "a close up of a frog in the water"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a frog is croaking"], "question": "which entity has a background of frogs?", "label": 1}, {"captions": ["water splashes and a door squeaks", "vehicle engines race around a track as a man commentates"], "sample_ids": ["sdXV-ylviw", "sZPuqDgX2V0"], "start_seconds": ["190", "30"], "properties": ["sound, splash, door", "commentator, race, track"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dog barks and taps with background noise ", "a man is speaking and a helicopter is flying overhead "], "question": "which entity is a video of a race?", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "a diesel truck engine runs continuously"], "sample_ids": ["sQGXqGcwOTc", "sZvwOuuPGP0"], "start_seconds": ["3", "50"], "properties": ["cling, speak, dishes", "engine, diesel, truck"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "of a bulldozer clearing a road in a forest stock footage and royalty-free videos"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a medium engine is running "], "question": "which entity is a moving object", "label": 1}, {"captions": ["water flows followed by women screaming", "a child speaks"], "sample_ids": ["w5W5Kqtc8E", "yW6FWLSLkx4"], "start_seconds": ["100", "40"], "properties": ["water, flow, women", "a, child, speaks"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is silent", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "a steam engine runs and whistles as it passes by"], "sample_ids": ["u7C-AEBQM", "se87d6yxEOA"], "start_seconds": ["30", "10"], "properties": ["ticks, rhythmic, quiet", "run, whistle, pass"], "captions_pred_video": [null, "footage of a train passing by a train station with smoke billowing out of the train's smokestack"], "captions_pred_audio": ["a ticktock of a clock", "a train is moving and blowing its whistle "], "question": "which entity is louder", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tK4VlLsNxak", "yajyRTUQk3U"], "start_seconds": ["120", "400"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "a woman, something, fried"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["some people speak", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vbZ-0lGPneg", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "a woman, something, fried"], "captions_pred_video": ["of a man holding a baby duck in his hands", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a person sniffles and sneezes", "a man talks as something metal hits against and glass is set down"], "sample_ids": ["uRlbY6aoBU", "x6ijhqRY38s"], "start_seconds": ["0", "250"], "properties": ["sneezes, sniffles, person", "something metal, glass, hit"], "captions_pred_video": [null, "a chef preparing a dish with a bottle of wine and a plate of food on a table"], "captions_pred_audio": ["a man is sneezing ", "a man is speaking and dishes are clanging "], "question": "which entity is about hitting something metal against glass?", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "a toilet flushes and water drains"], "sample_ids": ["s3cTDAj31g", "sfAvvZwdLCY"], "start_seconds": ["80", "20"], "properties": ["man, talk, woman", "water drains, flushes, water"], "captions_pred_video": [null, "footage of the toilet in the bathroom"], "captions_pred_audio": ["a man is speaking and a baby is crying", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["an insect buzzes around continuously", "a duck quacks continuously"], "sample_ids": ["v25l1jef3JY", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["buzzes, continuously, insect", "quacks, continuously, duck"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a duck is quacking loudly"], "question": "which entity is a bird", "label": 1}, {"captions": ["a stream of water flows quickly", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["wbHTKEJZyhc", "uEU-Hg5MTN8"], "start_seconds": ["20", "27"], "properties": ["stream, water, flow", "a woman, laughs, animal"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a woman is speaking and a baby is crying"], "question": "which entity is not a stream of water?", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "several insects fly while two men talk"], "sample_ids": ["xBxDz0CFVn0", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["stream, water, flow", "several, fly, men"], "captions_pred_video": ["footage is blurry and out of focus", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["animals bleat and cry out and then a woman speaks", "a man woman speak while crickets sing"], "sample_ids": ["yZp6xizR0yU", "zTLVJCo4WEE"], "start_seconds": ["30", "30"], "properties": ["animal, bleat, cry", "a, crickets, sing"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "- a boy with a rifle aiming at a target"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a woman speaks and crickets chirp"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["here comes the train and it starts to blow the horn and get close", "wind blows as people chatter quietly"], "sample_ids": ["s7knHCFW82w", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["blow horn, get close, train", "wind, chatter, people"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "footage is blurry and out of focus"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man talks while vehicles pass by", "pigeons vocalize and birds chirp"], "sample_ids": ["sK4u5T8hW78", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["a, man, talk", "vocalize, bird, chirp"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "wind blows as people chatter quietly"], "sample_ids": ["y4tPJXBKDig", "xBxDz0CFVn0"], "start_seconds": ["20", "30"], "properties": ["a, noise, talk", "wind, chatter, people"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a woman speaks happily and an animal chirps"], "sample_ids": ["yLy-WycbVVE", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["background, people, talk", "a woman, chirps, animal"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", null], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person sniffles and then sneezes in the distance", "wind blows as people chatter quietly"], "sample_ids": ["uRlbY6aoBU", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["a, distance, sneeze", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man is sneezing ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "wind blows as people chatter quietly"], "sample_ids": ["yks4cLgIDMc", "xBxDz0CFVn0"], "start_seconds": ["170", "30"], "properties": ["background, speaking, child", "wind, chatter, people"], "captions_pred_video": ["footage of two kids wrestling on the floor", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a child is crying", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "pigeons vocalize and birds chirp"], "sample_ids": ["tGcFnX0GHI", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["ring, talk, woman", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xBxDz0CFVn0", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["stream, water, flow", "water, radio, man"], "captions_pred_video": ["footage is blurry and out of focus", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "a motorcycle engine is revving while people are speaking"], "sample_ids": ["tDlfY3nmx1A", "y8dSeubCNI"], "start_seconds": ["160", "4"], "properties": ["applause, laugh, man", "engine revving, people speaking, motorcycle"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", null], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "an engine revving and people talking in the background"], "question": "which entity is a motorcycle?", "label": 1}, {"captions": ["an engine runs loudly", "wind blowing followed by a zoom"], "sample_ids": ["vqZuVbG6-HI", "vr8ZXjEBhMQ"], "start_seconds": ["130", "150"], "properties": ["loud, engine, run", "wind, blow, zoom"], "captions_pred_video": ["footage is blurry because it's raining outside", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["continuous snoring", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sLkeqCDJIyw", "xKB8O8LTs6s"], "start_seconds": ["120", "70"], "properties": ["loud, snoring, noise", "music, gunfire, explosion"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a person is snoring loudly", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is louder", "label": 1}, {"captions": ["a man talks as several small engines run", "water gurgles, metal squeaks and the water stops"], "sample_ids": ["u9A6VZQCZpU", "x4a9YGIw4ok"], "start_seconds": ["30", "120"], "properties": ["a, man, talk", "water, gurgles, stops"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a toilet flushes and water splashes"], "question": "which entity is about water?", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "a duck quacks continuously"], "sample_ids": ["uYT5gxnyMWM", "vh30P49Po6s"], "start_seconds": ["50", "30"], "properties": ["female, spraying, scream", "quacks, continuously, duck"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["roadway noise occurs and a truck accelerates", "water pouring and bubbling"], "sample_ids": ["tgbONvsP47Y", "uyRfq-jKPpo"], "start_seconds": ["0", "50"], "properties": ["noise, truck, accelerate", "water, bubbles, pouring"], "captions_pred_video": ["footage of a fire truck entering a garage", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a car is driving on the road ", "water is running from a faucet"], "question": "which is a liquid", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "a muffled toilet flushes and the water drains"], "sample_ids": ["yajyRTUQk3U", "sfAvvZwdLCY"], "start_seconds": ["400", "20"], "properties": ["noise, woman, speak", "flushes, drains, water"], "captions_pred_video": ["- a woman cooking in the kitchen", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a toilet is flushed"], "question": "which entity is silent", "label": 1}, {"captions": ["a motorcycle engine works nearby", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["tOSWIURC-4", "zFjIWfSD-4"], "start_seconds": ["0", "410"], "properties": ["engine, work, nearby", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["a lawn mower is running ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running?", "label": 1}, {"captions": ["a motorcycle engine works nearby", "wind blowing followed by a zoom"], "sample_ids": ["tOSWIURC-4", "vr8ZXjEBhMQ"], "start_seconds": ["0", "150"], "properties": ["engine, work, nearby", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a lawn mower is running ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is not a zoom?", "label": 0}, {"captions": ["water rushes and then a vehicle zooms past", "a small engine spits as it runs"], "sample_ids": ["s4Uz1Ffgo04", "sZvwOuuPGP0"], "start_seconds": ["100", "50"], "properties": ["water, rushes, vehicle", "spits, engine, runs"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "of a bulldozer clearing a road in a forest stock footage and royalty-free videos"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a medium engine is running "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["zALy31PjDl0", "zl9Dqx-j7q4"], "start_seconds": ["21", "6"], "properties": ["a man, a vehicle, a horn", "engine, laugh, loud"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["yYEVLuqEytU", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["animal, pig, background", "a woman, laughs, animal"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking and an animal snorting?", "label": 1}, {"captions": ["a train horn blares as a train passes, then fades", "some tunes played by whistling"], "sample_ids": ["zVacuqSb4LI", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["blares, fades, train", "tune, play, whistling"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "a woman speaks as she rubs two objects together"], "sample_ids": ["tGcFnX0GHI", "vzxHnu-SFEw"], "start_seconds": ["0", "80"], "properties": ["ring, talk, woman", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a woman is speaking and breathing with mechanisms in the background "], "question": "which woman is speaking", "label": 1}, {"captions": ["an audience gives applause as a man yells and a group sings", "continuous sizzling with a woman speaking towards the end"], "sample_ids": ["tdWhHV3X25Q", "ukxt9I7eMMg"], "start_seconds": ["60", "30"], "properties": ["applause, audience, yells", "continuous, woman, speaking"], "captions_pred_video": ["a man is talking to another man on a stage in front of a microphone", "footage of a person preparing food on a stool in a kitchen"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a woman is speaking while food is frying in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["an airplane engine runs", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["yVPZ2MNWpms", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["engine, airplane, runs", "female, spraying, scream"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a car is driving by on the road ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["paper folding and crinkling", "pigeons vocalize and birds chirp"], "sample_ids": ["zPpG3RD8lSs", "uiS58TNyUiw"], "start_seconds": ["20", "430"], "properties": ["paper, fold, crinkle", "vocalize, bird, chirp"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "of the pigeon in the cage"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["loud intermittent buzzing with intermittent laughter", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sLUnaPT5gM8", "wqZ135Ssz0"], "start_seconds": ["0", "60"], "properties": ["loud, laughter, intermittent", "two men, woman, birds"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", null], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["continuous sneezing together with speech", "a infant makes noise and is excited"], "sample_ids": ["x4dZyf9Gbj0", "wIJK3-5y0kA"], "start_seconds": ["130", "30"], "properties": ["continuous, sneeze, speech", "noise, excited, infant"], "captions_pred_video": ["footage is blurry and out of focus", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a woman sneezes and speaks", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "pigeons vocalize and birds chirp"], "sample_ids": ["rqu8iB22IY", "uiS58TNyUiw"], "start_seconds": ["5", "430"], "properties": ["sound, repeats, laugh", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "a vehicle accelerates and squeals tires"], "sample_ids": ["yRx9txMcBl0", "yRx9txMcBl0"], "start_seconds": ["40", "40"], "properties": ["accelerates, tires, squeals", "accelerates, tires, squeals"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a car is revving its engine and skidding "], "question": "which vehicle accelerates and squeals tires", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "multiple people speak and children yell while water gurgles"], "sample_ids": ["yks4cLgIDMc", "vb1fPSDI4c"], "start_seconds": ["170", "30"], "properties": ["background, speaking, child", "multiple, people, yell"], "captions_pred_video": ["footage of two kids wrestling on the floor", null], "captions_pred_audio": ["a man is speaking and a child is crying", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["an infant crying as a woman laughs", "a man speaks followed by another man speaking outside"], "sample_ids": ["xhmRY9yhC7c", "viuTg1M-dqg"], "start_seconds": ["20", "30"], "properties": ["a, laugh, infant", "two men, speak, follow"], "captions_pred_video": ["of a baby crying in a baby bouncer", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "a infant makes noise and is excited"], "sample_ids": ["rwTERCUno", "wIJK3-5y0kA"], "start_seconds": ["90", "30"], "properties": ["engine, idle, sputter", "noise, excited, infant"], "captions_pred_video": [null, "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["an engine is idling and vibrating", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "people speak as gunfire rings out"], "sample_ids": ["yRx9txMcBl0", "wqTCwqVRDlk"], "start_seconds": ["40", "80"], "properties": ["accelerates, tires, squeals", "gunfire, ring, speak"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "people speak as gunfire rings out"], "sample_ids": ["xM4joTqDVp4", "wqTCwqVRDlk"], "start_seconds": ["160", "80"], "properties": ["background, chirp, birds", "gunfire, ring, speak"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["sOa7g-44Dag", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["background, man, spray", "engine, laugh, loud"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "a person snores loudly multiple times at a close distance"], "sample_ids": ["uOpoD0gGXcs", "sSMl2vc3ek"], "start_seconds": ["120", "20"], "properties": ["chirps, woman, bird", "loud, multiple, distance"], "captions_pred_video": ["a herd of cows grazing in the field", null], "captions_pred_audio": ["birds are chirping and a man is speaking", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "people speak as gunfire rings out"], "sample_ids": ["yNtRmrn0io8", "wqTCwqVRDlk"], "start_seconds": ["210", "80"], "properties": ["storm, distance, strike", "gunfire, ring, speak"], "captions_pred_video": ["footage of a house in the middle of the night", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["rain falls and thunder roars", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "some tunes played by whistling"], "sample_ids": ["tQWGZLItBXk", "u6BnG6YZqJ4"], "start_seconds": ["170", "0"], "properties": ["music, person, ding", "tune, play, whistling"], "captions_pred_video": ["worms revolution screenshots", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "small dogs yip and bark sharply"], "sample_ids": ["shmR4OZtzqA", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["man, engine, idle", "bark, yip, sharply"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man speaks while a motor runs", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["water quietly rushes by while birds chirp in the background", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["sYITalLZjj4", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["water, rushes, background, birds", "a, scream, girl"], "captions_pred_video": ["two ducks are swimming in the water near each other", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["wind blows and birds chirp", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["animals bleat and cry out and then a woman speaks", "people speak as gunfire rings out"], "sample_ids": ["yZp6xizR0yU", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["animal, bleat, cry", "gunfire, ring, speak"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["u21-Z5gJCB8", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["background, voice, man", "motor noise, horn, siren"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a machine clanks and thumps and a male speaks", "a toilet flushes and a female speaks"], "sample_ids": ["sWZzXuWYY", "yaln9y8I7ms"], "start_seconds": ["420", "230"], "properties": ["male, clanks, thumps", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a toilet flushes and a man speaks"], "question": "which entity is a machine?", "label": 0}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "water pouring and bubbling"], "sample_ids": ["yZmhM1HcsyE", "uyRfq-jKPpo"], "start_seconds": ["4", "50"], "properties": ["engine, roar, water", "water, bubbles, pouring"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "water is running from a faucet"], "question": "which entity is more calm", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "a small engine idles continuously"], "sample_ids": ["x6ijhqRY38s", "y5WII6cTH7k"], "start_seconds": ["250", "40"], "properties": ["bowl, silverware, man", "engine, idle, continuously"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage of a sewing machine stitching a red and white hat"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "an engine is knocking and vibrating "], "question": "which entity is not moving", "label": 1}, {"captions": ["a car accelerates and wind blows", "small dogs yip and bark sharply"], "sample_ids": ["u0TrcHhkPQ", "v-wcQf4BDY0"], "start_seconds": ["20", "120"], "properties": ["accelerates, wind, blows", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["xKB8O8LTs6s", "wz7N8YRy74I"], "start_seconds": ["70", "30"], "properties": ["music, gunfire, explosion", "rooster, crow, background, men"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["xyL9F5VrjkE", "su6FAOcOA8c"], "start_seconds": ["20", "4"], "properties": ["engine, run, wind", "engine, idle, woman"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["wz7N8YRy74I", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["rooster, crow, background, people", "rustling, ducks, quack"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", null], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a duck quacks and a woman speaks"], "question": "which entity has more animals", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "an infant crying frantically"], "sample_ids": ["vZAw4apG0Es", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["people, clock, converse", "cry, infant, frantically"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "of the baby crying in the car seat"], "captions_pred_audio": ["a clock is ticking and people are talking", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["w0xsN8X18Y", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["music, surface, rain", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking and laughing?", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "multiple people speak and children yell while water gurgles"], "sample_ids": ["uzQnlJXBbOM", "vb1fPSDI4c"], "start_seconds": ["50", "30"], "properties": ["ringing, beep, stop", "multiple, people, yell"], "captions_pred_video": ["footage of a person using a cell phone on a table", null], "captions_pred_audio": ["a telephone rings and a man speaks", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "vehicle engines race around a track as a man commentates"], "sample_ids": ["wAAkbZToh8", "sZPuqDgX2V0"], "start_seconds": ["0", "30"], "properties": ["burp, laugh, speak", "commentator, race, track"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man burps and a woman speaks", "a man is speaking and a helicopter is flying overhead "], "question": "which entity is a video of a person speaking and laughing?", "label": 0}, {"captions": ["a low rumbling in the distance followed by a motorcycle engine revving up", "a stream of water runs briefly"], "sample_ids": ["vr8ZXjEBhMQ", "x-PeY8Yb8M4"], "start_seconds": ["150", "300"], "properties": ["sound, distance, engine", "stream, water, run"], "captions_pred_video": ["is taken from a motorcycle's point of view", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["sU53zg9Jp7s", "vfYTJq7nU"], "start_seconds": ["380", "130"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "rustling, ducks, quack"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", null], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a duck quacks and a woman speaks"], "question": "which entity is about a bird?", "label": 0}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "winds blows roughly as a vehicle races past"], "sample_ids": ["ukg5L09Wpvo", "xjvTpk2Zpr8"], "start_seconds": ["150", "70"], "properties": ["clickety-clack, train, whistle", "wind, blows, vehicle"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a jet engine roars and wind blows "], "question": "which entity is moving", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["zj2R0XoFr5k", "ukg5L09Wpvo"], "start_seconds": ["50", "150"], "properties": ["airplane, fly, overhead", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a train blows its whistle and blows its horn "], "question": "which is a train", "label": 1}, {"captions": ["an infant crying frantically", "a stream of water flows as people talk and wind blows"], "sample_ids": ["zwOBqeFTgiU", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["cry, infant, frantically", "stream, water, flow"], "captions_pred_video": ["of the baby crying in the car seat", "footage is blurry and out of focus"], "captions_pred_audio": ["a baby cries loudly", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["dogs barking and whimpering", "crowd applause while a guy laughs followed by another man speaking"], "sample_ids": ["tIY7qOV3rEM", "tDlfY3nmx1A"], "start_seconds": ["0", "160"], "properties": ["barking, whimpering, dog", "applause, laugh, man"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "a man in a suit and tie is talking to another man in a suit and tie"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a crowd is clapping and laughing and a man is speaking "], "question": "which entity is a group of people", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["s4Uz1Ffgo04", "zj2R0XoFr5k"], "start_seconds": ["100", "50"], "properties": ["roars, background, people speaking", "airplane, boy, fly"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vlS6YMeWAPo", "zl9Dqx-j7q4"], "start_seconds": ["40", "6"], "properties": ["noise, bleat, call", "engine, laugh, loud"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a goat bleats and birds chirp", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vJvryTwuAV8", "yajyRTUQk3U"], "start_seconds": ["16", "400"], "properties": ["audience, cheer, man", "a woman, something, fried"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a man speaking with light rustling", "water pouring and bubbling"], "sample_ids": ["zOZleIRqZm4", "uyRfq-jKPpo"], "start_seconds": ["80", "50"], "properties": ["light, rustling, man", "water, bubbles, pouring"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "water is running from a faucet"], "question": "which entity is more likely to be in a kitchen", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "wind blowing followed by a zoom"], "sample_ids": ["uEU-Hg5MTN8", "vr8ZXjEBhMQ"], "start_seconds": ["27", "150"], "properties": ["animal, grunts, snorts", "wind, blow, zoom"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "an airplane engine spools and people speak"], "sample_ids": ["v0x1odnXtP0", "wTjoRj1se3U"], "start_seconds": ["210", "390"], "properties": ["keyboard, type, computer", "airplane, engine, spool"], "captions_pred_video": ["how to make money on youtube in spanish", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a person is typing on a keyboard", "a jet engine is running and people are talking"], "question": "which is not a type of machine", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "speaking following by laughing and clapping"], "sample_ids": ["x6ijhqRY38s", "u2f5NpsoHBg"], "start_seconds": ["250", "30"], "properties": ["bowl, silverware, man", "person, laugh, clap"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "is being projected on a screen at the front of the stage"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a woman is speaking and a crowd is clapping"], "question": "which person is speaking?", "label": 1}, {"captions": ["birds chirp and pigeons vocalize while walking around", "a car speeding up in the distance"], "sample_ids": ["wIvYjuR3nrg", "u0TrcHhkPQ"], "start_seconds": ["9", "20"], "properties": ["birds, pigeons, vocalize", "distance, car, speed"], "captions_pred_video": ["footage of a pigeon sitting on a roof with trees in the background", null], "captions_pred_audio": ["birds are chirping and cooing", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["vbr9mHKc8WM", "wSVhSdj0F0"], "start_seconds": ["40", "10"], "properties": ["noise, loudness, engine", "horn honks, keys jingle, electronic beep"], "captions_pred_video": [null, null], "captions_pred_audio": ["an engine is idling", "a car horn honks and keys jangle with background noise "], "question": "which entity is quieter", "label": 0}, {"captions": ["an airplane accelerates briefly", "birds chirp and objects are moved around"], "sample_ids": ["zjTG0gaGCUI", "yPUYU6t3rwo"], "start_seconds": ["80", "370"], "properties": ["accelerates, airplane, briefly", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a jet engine roars as wind blows ", "insects buzz and a man speaks"], "question": "which entity is moving around objects", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "pigeons vocalize and birds chirp"], "sample_ids": ["w8uLijTqtlU", "uiS58TNyUiw"], "start_seconds": ["70", "430"], "properties": ["wind, microphone, noise", "vocalize, bird, chirp"], "captions_pred_video": ["footage is blurry and shaky", "of the pigeon in the cage"], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "a clock ticktocks"], "sample_ids": ["wztCSUxOf8", "v-g-j2uTByM"], "start_seconds": ["130", "30"], "properties": ["a crowd, yells, applauds", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "people cheer as a vehicle engine revs"], "sample_ids": ["sHbXC6na9hg", "xjhAnI2q6hM"], "start_seconds": ["0", "6"], "properties": ["a person, saw, wood", "engine revs, vehicle, people"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["an engine is idling and vibrating", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sncRqQ67iJU", "xKB8O8LTs6s"], "start_seconds": ["460", "70"], "properties": ["loud, repeatedly, man", "music, gunfire, explosion"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a person is snoring", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "children speak and play together"], "sample_ids": ["xyL9F5VrjkE", "yVVP8XvWJTo"], "start_seconds": ["20", "260"], "properties": ["engine, run, wind", "children, speak, play"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage of a playground at a school or daycare center"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "children are speaking and breathing with background noise "], "question": "which entity is more likely to be in motion", "label": 0}, {"captions": ["water splashes and a motorboat passes as people yell", "a stream of water flows as people talk and wind blows"], "sample_ids": ["w5W5Kqtc8E", "xBxDz0CFVn0"], "start_seconds": ["100", "30"], "properties": ["water, splashes, motorboat", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["u6jIvCtKarQ", "sLUnaPT5gM8"], "start_seconds": ["70", "0"], "properties": ["a, man, speaks", "loud, laughter, intermittent"], "captions_pred_video": ["footage of a person using a blender on a stove top", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "paper is crumpling consistently"], "sample_ids": ["wSVhSdj0F0", "v5cSxLaHADY"], "start_seconds": ["10", "0"], "properties": ["beep, clang, footsteps", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a toilet door squeaks as it is opened", "some men converse over an engine running"], "sample_ids": ["sdXV-ylviw", "sCiy7QS1U"], "start_seconds": ["190", "300"], "properties": ["door, toilet, squeaks", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dog barks and taps with background noise ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a conversation?", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "a woman speaks happily and an animal chirps"], "sample_ids": ["sOa7g-44Dag", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["audio, scratching, man", "a woman, chirps, animal"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", null], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a woman is speaking and a dog is barking "], "question": "which entity is more likely to be a recording", "label": 1}, {"captions": ["a large crowd cheers and applauds", "a cat meows as a young woman speaks"], "sample_ids": ["rqfQRErjfk8", "x5cuQjOdM3E"], "start_seconds": ["170", "30"], "properties": ["crowd, cheers, applauds", "cat, meows, young woman"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "a black background with an airplane flying in the sky"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a cat meows and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "popping and crackling repeats as men yell and laugh"], "sample_ids": ["zuua6-5goWw", "rqu8iB22IY"], "start_seconds": ["30", "5"], "properties": ["birds, chirp, quiet, man, speaks", "sound, repeats, laugh"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", null], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a dog barks and a man speaks while music plays "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "a car accelerates and wind blows"], "sample_ids": ["tDVADusiIoc", "u0TrcHhkPQ"], "start_seconds": ["60", "20"], "properties": ["wind, radio, waves", "accelerates, wind, blows"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "paper folding and crinkling"], "sample_ids": ["u5RmF3c3Aw", "zPpG3RD8lSs"], "start_seconds": ["60", "20"], "properties": ["engine, car, zoom", "paper, fold, crinkle"], "captions_pred_video": [null, "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "the wind blows and a mouse clicks "], "question": "which is not a vehicle", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "some men converse over an engine running"], "sample_ids": ["soTOh3zYJfY", "sCiy7QS1U"], "start_seconds": ["40", "300"], "properties": ["vehicle, skid, tires", "men, converse, engine"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["a man speaks as insects buzz and a bird chirps", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["t25U-v4k4ts", "zl9Dqx-j7q4"], "start_seconds": ["40", "6"], "properties": ["a, chirps, bird", "engine, laugh, loud"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["paper is crumpling consistently", "wind blowing followed by a zoom"], "sample_ids": ["v5cSxLaHADY", "vr8ZXjEBhMQ"], "start_seconds": ["0", "150"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "wind, blow, zoom"], "captions_pred_video": ["footage of the person holding a pair of scissors", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["paper is crumpled and crinkled", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["vmrxwuAMb2I", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["a dog, inhales, exhales", "animal, grunts, snorts"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a dog barks and growls", "a woman is speaking and a baby is crying"], "question": "which animal is grunting and snorting", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["ukxt9I7eMMg", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["food, pan, cook", "a woman, something, fried"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a woman is speaking while food is frying in the background"], "question": "what is being cooked in the pan?", "label": 1}, {"captions": ["a train horn blares as a train passes, then fades", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zVacuqSb4LI", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["blares, fades, train", "applause, audience, yells"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["tDlysoZiA1I", "ziUT9IFTkjg"], "start_seconds": ["0", "10"], "properties": ["animal, grunt, multiple", "background, birds, rustling"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", null], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "birds are chirping and a chime is ringing "], "question": "which entity has a background of birds chirping", "label": 1}, {"captions": ["a rumble grows louder", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["y4MY9mp8-TA", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["loudness, increase, rumble", "wind, blow, vehicle"], "captions_pred_video": ["a helicopter flying in the sky", null], "captions_pred_audio": ["a helicopter flies overhead ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["continuous sneezing together with speech", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["x4dZyf9Gbj0", "xKB8O8LTs6s"], "start_seconds": ["130", "70"], "properties": ["continuous, sneeze, speech", "music, gunfire, explosion"], "captions_pred_video": ["footage is blurry and out of focus", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman sneezes and speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "a man speaks as a motor runs in the background"], "sample_ids": ["w34HjHr6gAY", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["beeps, hit, woman", "background, motor, run"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vddP56-ogds", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["liquid, laughs, man", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["a jet engine spools up and takes off", "a infant makes noise and is excited"], "sample_ids": ["vBslzh7saPw", "wIJK3-5y0kA"], "start_seconds": ["90", "30"], "properties": ["engine, spools, takes", "noise, excited, infant"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a baby cries and a woman speaks"], "question": "which entity is quieter", "label": 1}, {"captions": ["children speak and play together", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["yVVP8XvWJTo", "uYT5gxnyMWM"], "start_seconds": ["260", "50"], "properties": ["children, speak, play", "female, spraying, scream"], "captions_pred_video": ["footage of a playground at a school or daycare center", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity is more violent", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["uZesmtKZGSw", "tiDFTC-5vU"], "start_seconds": ["250", "30"], "properties": ["car, track, man", "male, duck, laugh"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", null], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a man is speaking and ducks are quacking"], "question": "which entity has a man speaking to a duck?", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "a child yells and another yells"], "sample_ids": ["vs65y4qmyBE", "vMDHu7Lxcgw"], "start_seconds": ["340", "410"], "properties": ["engine, run, man", "two, yell, child"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "a boy playing on a trampoline in the backyard"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a woman is speaking and a child is shouting"], "question": "which entity is more active", "label": 1}, {"captions": ["pigeons vocalize and birds chirp", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["uiS58TNyUiw", "uYT5gxnyMWM"], "start_seconds": ["430", "50"], "properties": ["vocalize, bird, chirp", "female, spraying, scream"], "captions_pred_video": ["of the pigeon in the cage", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "water flows and trickles"], "sample_ids": ["zkKdxzNC97Y", "tB7hWb9gTuQ"], "start_seconds": ["27", "30"], "properties": ["hard, surface, door", "water, flow, trickle"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a door is opened and closed", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "a child speaks in closed space"], "sample_ids": ["w5W5Kqtc8E", "yW6FWLSLkx4"], "start_seconds": ["100", "40"], "properties": ["wind, engine, scream", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "a car accelerates and wind blows"], "sample_ids": ["wyllXV6PjKo", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["a baby, a woman, a man", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman speaks and a baby cries", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["wind loudly blowing while people speak in the background followed by a horn blowing", "a man speaks as a motor runs in the background"], "sample_ids": ["xjhAnI2q6hM", "xZepNM9qcRA"], "start_seconds": ["6", "30"], "properties": ["wind, blow, loudly", "background, motor, run"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a woman speaks and other women and a man talk with her", "multiple beeps are followed by a squawk and a child speaking"], "sample_ids": ["vbpKkWvfOu4", "w34HjHr6gAY"], "start_seconds": ["560", "30"], "properties": ["a, woman, man", "beeps, squawk, child speaking"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a beep sounds followed by a child speaking"], "question": "which entity has a child speaking?", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "rain falls on a surface as men speak and music plays"], "sample_ids": ["wRV8yMk886E", "w0xsN8X18Y"], "start_seconds": ["0", "30"], "properties": ["liquid, spray, nozzle", "music, surface, rain"], "captions_pred_video": ["two cars are parked in a parking lot at night", null], "captions_pred_audio": ["a man speaks followed by a loud burst", "a man is speaking while a motorboat is moving in the background "], "question": "which entity has a nozzle spraying liquid?", "label": 0}, {"captions": ["bees buzz and wind blows", "a car speeding up in the distance"], "sample_ids": ["tMJne1a4AFI", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["bees buzz, wind blows, bees", "distance, car, speed"], "captions_pred_video": ["a swarm of bees on the ground", null], "captions_pred_audio": ["a swarm of bees buzzing around", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "tapping occurs then a baby cries"], "sample_ids": ["xfudFO976zE", "wIJK3-5y0kA"], "start_seconds": ["0", "30"], "properties": ["animal, bleats, cry", "a, cry, baby"], "captions_pred_video": ["footage is blurry and shaky", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a baby cries and a woman speaks"], "question": "which entity is crying", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "a man speaks over intermittent keyboard taps"], "sample_ids": ["sG7TyPnFDR0", "tw76HGONaKg"], "start_seconds": ["180", "570"], "properties": ["beeps, machine, smoke alarm", "audio, man, keyboard"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a man speaks and types on a computer keyboard "], "question": "which entity has a man speaking over intermittent keyboard taps?", "label": 1}, {"captions": ["electronic beeps occur in a short series", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["y682ml90jGw", "su6FAOcOA8c"], "start_seconds": ["11", "4"], "properties": ["beeps, series, electronic", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a beeping sound is being made ", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "a train horn blows as it passes by"], "sample_ids": ["vms5XGTDVQc", "zVacuqSb4LI"], "start_seconds": ["220", "30"], "properties": ["paper, crumpled, crinkled", "horn, blows, train"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["paper is crumpled and crinkled", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which is louder", "label": 0}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "water runs from a faucet while some men speak and the water runs in the sink"], "sample_ids": ["yLy-WycbVVE", "vzceMbklWc"], "start_seconds": ["30", "180"], "properties": ["background, people, talk", "water, faucet, sink"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", null], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "water is running and a man is speaking"], "question": "which entity has a sink?", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "people speak as gunfire rings out"], "sample_ids": ["vqZuVbG6-HI", "wqTCwqVRDlk"], "start_seconds": ["130", "80"], "properties": ["background, male, female", "gunfire, ring, speak"], "captions_pred_video": ["footage is blurry because it's raining outside", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a man is speaking and a gun is fired"], "question": "which entity has more gunfire", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "a series of light horn beeps is followed by a loud steam whistle"], "sample_ids": ["vXlk0lIQBFo", "wnpJndXuxLc"], "start_seconds": ["470", "50"], "properties": ["wind, talk, vocalize", "beeps, loud, whistle"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["a baby cries and a woman moans", "a woman talking as an infant is crying"], "sample_ids": ["smDKStoHBJo", "tMbMDvT50j8"], "start_seconds": ["0", "12"], "properties": ["a, cry, woman", "a, talk, infant"], "captions_pred_video": ["a man holding a crying baby in his arms", "shows a little girl covering her face with her hands while sitting at a table"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a baby cries and a woman speaks"], "question": "which entity has a woman talking to an infant?", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["spYNpeN7rPY", "uEU-Hg5MTN8"], "start_seconds": ["1", "27"], "properties": ["a clock, ticktock, man", "a woman, laughs, animal"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity has a clock in it?", "label": 0}, {"captions": ["a race car approaches quickly and slows down squealing tires", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sEprKHm8Sj8", "zj2R0XoFr5k"], "start_seconds": ["90", "50"], "properties": ["car, tires, slows", "airplane, boy, fly"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "small dogs yip and bark sharply"], "sample_ids": ["zofjfKhqLk8", "v-wcQf4BDY0"], "start_seconds": ["10", "120"], "properties": ["noise, stop, motor", "bark, yip, sharply"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["an insect buzzes around continuously", "a car accelerates and wind blows"], "sample_ids": ["v25l1jef3JY", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["buzzes, continuously, insect", "accelerates, wind, blows"], "captions_pred_video": ["a black background with a cartoon character in the foreground", null], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["xyx6eNVEYRY", "xBxDz0CFVn0"], "start_seconds": ["380", "30"], "properties": ["loud, engine, muffles", "stream, water, flow"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "footage is blurry and out of focus"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a man is speaking with wind noise in the background "], "question": "which entity is flowing", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "an insect buzzes around continuously"], "sample_ids": ["vhJWZheqaE", "v25l1jef3JY"], "start_seconds": ["0", "0"], "properties": ["water drains unevenly, toilet flushes, water drains", "buzzes, continuously, insect"], "captions_pred_video": [null, "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a toilet is flushed", "a fly is buzzing around a microphone "], "question": "which entity is a source of noise", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "a car accelerates and wind blows"], "sample_ids": ["vcmWSmvti8", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["music, man, fire", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["animals bleat and cry out and then a woman speaks", "a duck quacks continuously"], "sample_ids": ["yZp6xizR0yU", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["animal, bleat, cry", "quacks, continuously, duck"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a duck is quacking loudly"], "question": "which animal is speaking", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a woman speaks happily and an animal chirps"], "sample_ids": ["sK4u5T8hW78", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["a, car, pass", "a woman, chirps, animal"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking and a dog is barking "], "question": "which entity is more active", "label": 1}, {"captions": ["some men converse over an engine running", "a vehicle is skidding and squealing tires"], "sample_ids": ["sCiy7QS1U", "soTOh3zYJfY"], "start_seconds": ["300", "40"], "properties": ["men, converse, engine", "vehicle, skid, tires"], "captions_pred_video": [null, "a red car drifting on a winding road with smoke coming out of it"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["small dogs yip and bark sharply", "a man speaks followed by another man speaking outside"], "sample_ids": ["v-wcQf4BDY0", "viuTg1M-dqg"], "start_seconds": ["120", "30"], "properties": ["bark, yip, sharply", "two men, speak, follow"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a dog barks and growls", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more quiet", "label": 1}, {"captions": ["long loud burping by a man", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["xmiUIOhtZyQ", "wDVMhEdTiVw"], "start_seconds": ["60", "30"], "properties": ["loud, burp, man", "gun, shoot, water"], "captions_pred_video": ["homer simpson drinking a beer", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a person burps and music plays in the background ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is not followed by water sloshing nearby?", "label": 0}, {"captions": ["a person uses a saw to cut some wood", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sHbXC6na9hg", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["a person, saw, wood", "a woman, something, fried"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "- a woman cooking in the kitchen"], "captions_pred_audio": ["an engine is idling and vibrating", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["vzxHnu-SFEw", "uEU-Hg5MTN8"], "start_seconds": ["80", "27"], "properties": ["two objects, woman, speak", "animal, grunts, snorts"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["people speak and tapping occurs", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["tFCUUGdREgA", "wDVMhEdTiVw"], "start_seconds": ["70", "30"], "properties": ["people, tap, speak", "gun, shoot, water"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to cause water to slosh", "label": 1}, {"captions": ["a child speaks in closed space", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["yW6FWLSLkx4", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["child, space, speak", "female, spraying, scream"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "a infant makes noise and is excited"], "sample_ids": ["zgUgkpk78xU", "wIJK3-5y0kA"], "start_seconds": ["70", "30"], "properties": ["clinking, humming, horn", "noise, excited, infant"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "some tunes played by whistling"], "sample_ids": ["w34HjHr6gAY", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["beeps, hit, woman", "tune, play, whistling"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a person whistling a song"], "question": "which entity is played by whistling", "label": 1}, {"captions": ["an airplane engine runs", "some men converse over an engine running"], "sample_ids": ["yVPZ2MNWpms", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["engine, airplane, runs", "men, converse, engine"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", null], "captions_pred_audio": ["a car is driving by on the road ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["food is frying while a woman speaks", "a beep occurs briefly"], "sample_ids": ["yhQ2Lg-7qDY", "xtWeJ56-U-g"], "start_seconds": ["130", "20"], "properties": ["food, woman, speak", "beep, occur, briefly"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8"], "captions_pred_audio": ["a faucet is running and a man is speaking", "mechanisms are ticking and a beep is heard "], "question": "which entity is silent", "label": 1}, {"captions": ["a toilet flushes and water drains", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sfAvvZwdLCY", "vJ7JPEFhyLA"], "start_seconds": ["20", "16"], "properties": ["water drains, flushes, water", "three men, wind, flow"], "captions_pred_video": ["footage of the toilet in the bathroom", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water flowing", "label": 1}, {"captions": ["a man is filing a hard object", "water is sprayed across a hard surface"], "sample_ids": ["vveS8HT7Uog", "sQwlkXjQabo"], "start_seconds": ["100", "10"], "properties": ["a man, hard, object", "water, spray, surface"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "spraying followed by silence"], "question": "which object is harder to file", "label": 0}, {"captions": ["water flows as men speak and yell", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vJ7JPEFhyLA", "tdWhHV3X25Q"], "start_seconds": ["16", "60"], "properties": ["water, flow, men", "applause, audience, yells"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "vehicles pass by on a roadway"], "sample_ids": ["vveS8HT7Uog", "tgbONvsP47Y"], "start_seconds": ["100", "0"], "properties": ["a man, objects, speak", "pass, vehicle, roadway"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["an electric engine works nearby followed by a child talking", "a duck quacks continuously"], "sample_ids": ["xSKJGCItUWE", "vh30P49Po6s"], "start_seconds": ["10", "30"], "properties": ["engine, work, child", "quacks, continuously, duck"], "captions_pred_video": ["footage of the helicopter flying in the room", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a vehicle engine runs as a siren and horn sound", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["u--KhUW8l1Y", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["sound, vehicle, horn", "airplane, boy, fly"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a clock ticktocks continuously", "someone is typing on a computer keyboard"], "sample_ids": ["vlJS7LN2XyM", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["ticktocks, clock, ticktocks continuously", "keyboard, type, computer"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "how to make money on youtube in spanish"], "captions_pred_audio": ["a ticktock of a clock", "a person is typing on a keyboard"], "question": "which object is moving", "label": 1}, {"captions": ["music plays followed by gunshots and then an explosion", "a man laughs and speaks as cats purr and hiss"], "sample_ids": ["xKB8O8LTs6s", "vVhthZ45k3Y"], "start_seconds": ["70", "30"], "properties": ["music, gunshots, explosion", "cat, purr, hiss"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage is blurry and out of focus"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking and a cat is meowing"], "question": "which entity is more calm", "label": 1}, {"captions": ["a man speaks as a machine runs", "vehicles pass by on a roadway"], "sample_ids": ["vD6lYD1l0BY", "tgbONvsP47Y"], "start_seconds": ["330", "0"], "properties": ["a, machine, run", "pass, vehicle, roadway"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a toilet flushes and water drains", "plastic is tapped on while someone speaks"], "sample_ids": ["sfAvvZwdLCY", "wvKpEYswXO0"], "start_seconds": ["20", "150"], "properties": ["water drains, flushes, water", "plastic, tap, speak"], "captions_pred_video": ["footage of the toilet in the bathroom", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "a baby laugh at a sputter"], "sample_ids": ["ziUT9IFTkjg", "sLUnaPT5gM8"], "start_seconds": ["10", "0"], "properties": ["background, birds, rustling", "laugh, sputter, baby"], "captions_pred_video": [null, "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more likely to be in a forest?", "label": 0}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "a duck quacks loudly and continuously"], "sample_ids": ["sjlVMgdGSK0", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["car, revving, loudly", "loud, continuous, quacks"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "winds blows roughly as a vehicle races past"], "sample_ids": ["sAam2NqGhLY", "xjvTpk2Zpr8"], "start_seconds": ["20", "70"], "properties": ["snoring, breathing, child", "wind, blows, vehicle"], "captions_pred_video": ["of a little girl sleeping on a couch", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a person is snoring", "a jet engine roars and wind blows "], "question": "which entity is not a person", "label": 1}, {"captions": ["a child yells and another yells", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vMDHu7Lxcgw", "xBxDz0CFVn0"], "start_seconds": ["410", "30"], "properties": ["two, yell, child", "stream, water, flow"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "soft movement is accompanied by clocks ticking in the background"], "sample_ids": ["uiItxDsDMFI", "vlJS7LN2XyM"], "start_seconds": ["30", "30"], "properties": ["wood, piece, saw", "background, clocks, ticking"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video"], "captions_pred_audio": ["a saw is being used with background noise ", "a ticktock of a clock"], "question": "which entity is a video of a person sawing wood?", "label": 0}, {"captions": ["a woman talks while a baby cries and a man whispers", "a stream of water flows as people talk and wind blows"], "sample_ids": ["smDKStoHBJo", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["a, talk, baby, cry", "stream, water, flow"], "captions_pred_video": ["a man holding a crying baby in his arms", "footage is blurry and out of focus"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing?", "label": 1}, {"captions": ["a male speaks and another male speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["viuTg1M-dqg", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["two males, speaking, male", "water, radio, man"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["a person screams glaringly", "water is sprayed across a hard surface"], "sample_ids": ["xC8kbrKJmco", "sQwlkXjQabo"], "start_seconds": ["0", "10"], "properties": ["glaringly, screams, person", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a goat is bleating ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a weapon fires multiple times", "a vehicle engine accelerating then running on idle"], "sample_ids": ["sMC07Ucy7kg", "vYkA3cfXp5Q"], "start_seconds": ["10", "30"], "properties": ["weapon, fire, multiple", "engine, accelerate, idle"], "captions_pred_video": ["footage is from a car's point of view", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "an engine is idling"], "question": "which entity is not a weapon", "label": 1}, {"captions": ["a goat screams and people speak in the background", "three men talk while wind blows and some liquid flows"], "sample_ids": ["xC8kbrKJmco", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["background, goat, scream", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a goat is bleating ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["a machine beeps continuously", "paper is crumpling consistently"], "sample_ids": ["y682ml90jGw", "v5cSxLaHADY"], "start_seconds": ["11", "0"], "properties": ["beeps, machine, continuously", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a beeping sound is being made ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "a child speaks in closed space"], "sample_ids": ["zY3icUyMdh8", "yW6FWLSLkx4"], "start_seconds": ["20", "40"], "properties": ["dog, bark, engine", "child, space, speak"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a male speaks and another male speaks", "an infant crying frantically"], "sample_ids": ["viuTg1M-dqg", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["two males, speaking, male", "cry, infant, frantically"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a man speaks, then dials a rotary telephone", "paper folding and crinkling"], "sample_ids": ["tK4VlLsNxak", "zPpG3RD8lSs"], "start_seconds": ["120", "20"], "properties": ["a, dial, telephone", "paper, fold, crinkle"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "the wind blows and a mouse clicks "], "question": "which is not a rotary telephone", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["sG7TyPnFDR0", "wDVMhEdTiVw"], "start_seconds": ["180", "30"], "properties": ["beeps, machine, smoke alarm", "gun, shoot, water"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["xNMovAf3o50", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["rain, thunder, music", "airplane, boy, fly"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["thunder and rain with music playing in the background ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["an infant crying as a woman laughs", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["xhmRY9yhC7c", "vfYTJq7nU"], "start_seconds": ["20", "130"], "properties": ["a, laugh, infant", "rustling, ducks, quack"], "captions_pred_video": ["of a baby crying in a baby bouncer", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a duck quacks and a woman speaks"], "question": "which entity is about a woman and an infant?", "label": 0}, {"captions": ["wind blowing followed by a zoom", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["vr8ZXjEBhMQ", "wqZ135Ssz0"], "start_seconds": ["150", "60"], "properties": ["wind, blow, zoom", "two men, woman, birds"], "captions_pred_video": ["is taken from a motorcycle's point of view", null], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["rustling with distant murmuring", "small dogs yip and bark sharply"], "sample_ids": ["wnNNcxAPwGQ", "v-wcQf4BDY0"], "start_seconds": ["0", "120"], "properties": ["sound, distance, rustling", "bark, yip, sharply"], "captions_pred_video": ["footage of a yellow truck doing a burnout on a race track", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a crowd of people are talking and laughing while a skateboard rolls by ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vJvryTwuAV8", "zl9Dqx-j7q4"], "start_seconds": ["16", "6"], "properties": ["audience, cheer, man", "engine, laugh, loud"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a toilet flushes and a female speaks"], "sample_ids": ["tOSWIURC-4", "yaln9y8I7ms"], "start_seconds": ["0", "230"], "properties": ["engine, work, nearby", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a lawn mower is running ", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "a person is snoring while sleeping"], "sample_ids": ["sjlVMgdGSK0", "vJrjSeP17yE"], "start_seconds": ["30", "40"], "properties": ["car, revving, loudly", "a person is sleeping, snoring, person"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "a black background with a small plane flying in the sky"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a person snoring loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "a clock ticktocks"], "sample_ids": ["zF8yoL0rkbI", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["engine, run, someone", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of the traffic on the street at night", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "paper is crumpling consistently"], "sample_ids": ["wvKpEYswXO0", "v5cSxLaHADY"], "start_seconds": ["150", "0"], "properties": ["sound, water, running", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["some people speak", "winds blows roughly as a vehicle races past"], "sample_ids": ["vbZ-0lGPneg", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "wind, blows, vehicle"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["two frogs croak at each other", "multiple people speak and children yell while water gurgles"], "sample_ids": ["zg0X6BnhOLQ", "vb1fPSDI4c"], "start_seconds": ["410", "30"], "properties": ["two frogs, croak, at each other", "multiple, people, yell"], "captions_pred_video": ["footage of lightning in the sky at night", null], "captions_pred_audio": ["a frog is croaking", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["wind blows and women speak as livestock vocalizes", "water is sprayed across a hard surface"], "sample_ids": ["vXlk0lIQBFo", "sQwlkXjQabo"], "start_seconds": ["470", "10"], "properties": ["wind, speak, vocalize", "water, spray, surface"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["y1saVTXsKwc", "uEU-Hg5MTN8"], "start_seconds": ["80", "27"], "properties": ["a, dog, talk", "a woman, laughs, animal"], "captions_pred_video": ["a dog playing with a pink ball", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a dog barks and a man speaks", "a woman is speaking and a baby is crying"], "question": "which entity has a woman talking to an animal?", "label": 0}, {"captions": ["a jet engine spools up and takes off", "an adult male speaks and dials a rotary phone"], "sample_ids": ["vBslzh7saPw", "tK4VlLsNxak"], "start_seconds": ["90", "120"], "properties": ["engine, spools, takes", "An adult male speaks, dials, and speaks into a rotary phone"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "person is wearing a headset and holding a remote control in his hand"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man is speaking and using a sewing machine"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "people applaud and hoot and chat quietly"], "sample_ids": ["siJFXfGWgDk", "wwyfGO2J4"], "start_seconds": ["50", "90"], "properties": ["a, bird, vehicle", "people, applaud, hoot"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", null], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a large crowd cheers and applauds"], "sample_ids": ["ugHJF0hfYkg", "rqfQRErjfk8"], "start_seconds": ["10", "170"], "properties": ["engine, running, continuously", "crowd, cheers, applauds"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a man hugging another man in front of an orchestra"], "captions_pred_audio": ["a helicopter is flying overhead ", "a crowd of people clapping and cheering"], "question": "which entity is a human activity", "label": 1}, {"captions": ["roadway noise occurs and a truck accelerates", "a man speaks as a car is passing by"], "sample_ids": ["tgbONvsP47Y", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["noise, truck, accelerate", "a, car, pass"], "captions_pred_video": ["footage of a fire truck entering a garage", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a car is driving on the road ", "a man is speaking with background noise and breathing sounds "], "question": "which is not a vehicle", "label": 1}, {"captions": ["white noise and birds chirping", "a propeller rotates loudly and intensely"], "sample_ids": ["wRBHTgrbiwg", "ugHJF0hfYkg"], "start_seconds": ["50", "10"], "properties": ["noise, white, chirping", "loud, intense, propeller"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a helicopter is flying overhead "], "question": "which noise is louder", "label": 1}, {"captions": ["a child babbles as a woman speaks", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["wEBlkGWVWwE", "wqZ135Ssz0"], "start_seconds": ["260", "60"], "properties": ["a, babble, woman", "two men, woman, birds"], "captions_pred_video": ["shows a person writing on the whiteboard", null], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["an adult woman and an adult man speak", "people applaud and hoot and chat quietly"], "sample_ids": ["zTLVJCo4WEE", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["two people, adult, speak", "people, applaud, hoot"], "captions_pred_video": ["- a boy with a rifle aiming at a target", null], "captions_pred_audio": ["a woman speaks and crickets chirp", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["xfaoyyzw2WU", "su6FAOcOA8c"], "start_seconds": ["180", "4"], "properties": ["loud, jet engine, roar", "engine, idle, woman"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a woman is speaking and a subway train is moving "], "question": "which engine is quieter", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "an airplane engine spools and people speak"], "sample_ids": ["sjlVMgdGSK0", "wTjoRj1se3U"], "start_seconds": ["30", "390"], "properties": ["accelerates, vehicle, race car", "airplane, engine, spool"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a jet engine is running and people are talking"], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["a vehicle engine runs and wind blows before women yell", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["w5W5Kqtc8E", "wz7N8YRy74I"], "start_seconds": ["100", "30"], "properties": ["wind, blow, vehicle", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["wind blows as people chatter quietly", "a woman speaks followed by another woman whimpering and speaking"], "sample_ids": ["xBxDz0CFVn0", "xOZfdgAgJ9o"], "start_seconds": ["30", "40"], "properties": ["wind, chatter, people", "woman, whimpering, speaking"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a woman talking to a man in a doctor's office"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "a stream of water flows as people talk and wind blows"], "sample_ids": ["wsHBIgzs9Fs", "xBxDz0CFVn0"], "start_seconds": ["50", "30"], "properties": ["horn, continuous, buzzing", "stream, water, flow"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "footage is blurry and out of focus"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a man is speaking with wind noise in the background "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "a drill runs and two people laugh"], "sample_ids": ["u7C-AEBQM", "tEE3MpBt1sg"], "start_seconds": ["30", "50"], "properties": ["ticks, rhythmic, quiet", "two people, laugh, drill"], "captions_pred_video": [null, "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a ticktock of a clock", "people are laughing breathing and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "an airplane flies overhead as a woman speaks"], "sample_ids": ["vh30P49Po6s", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["loud, continuous, quacks", "airplane, fly, overhead"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a duck is quacking loudly", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying overhead", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "a man speaks with another voice speaking in the background"], "sample_ids": ["su6FAOcOA8c", "u21-Z5gJCB8"], "start_seconds": ["4", "30"], "properties": ["engine, idle, woman", "background, voice, man"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "- a person cooking eggs in a pan on the stove"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["a church bell rings several times", "a child speaks in closed space"], "sample_ids": ["sUVVjE3Ucp8", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["ring, bell, several", "child, space, speak"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a church bell is ringing ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "a car speeding up in the distance"], "sample_ids": ["tIY7qOV3rEM", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "distance, car, speed"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", null], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a baby cries and a woman moans", "paper is crumpling consistently"], "sample_ids": ["smDKStoHBJo", "v5cSxLaHADY"], "start_seconds": ["0", "0"], "properties": ["a, cry, woman", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a man holding a crying baby in his arms", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["an engine runs and a man speaks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["yT5WfYMRr-U", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["engine, run, man", "a woman, something, fried"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["xvDdE3zNf8Y", "sLUnaPT5gM8"], "start_seconds": ["120", "0"], "properties": ["a, female, speaks", "loud, laughter, intermittent"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a woman speaks and crumples paper", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a piece of wood is being placed down and sawed", "an infant crying and a woman speaking with some distant murmuring"], "sample_ids": ["uiItxDsDMFI", "smDKStoHBJo"], "start_seconds": ["30", "0"], "properties": ["wood, piece, saw", "a, infant, speaking"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "a man holding a crying baby in his arms"], "captions_pred_audio": ["a saw is being used with background noise ", "a baby is crying and a woman is speaking"], "question": "which entity is a person", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["zgUgkpk78xU", "wDVMhEdTiVw"], "start_seconds": ["70", "30"], "properties": ["horn, bells, ring", "gun, shoot, water"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a warning", "label": 0}, {"captions": ["a man speaks then multiple motorcycles pass by", "water splashes as an animal walks through"], "sample_ids": ["zcDwZ6W7E3E", "w1ir-sZ3Im8"], "start_seconds": ["180", "90"], "properties": ["a, man, speak", "animal, water, splashes"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "water splashes and gurgles as people speak"], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["water flows as a woman laughs and a man speaks", "some men converse over an engine running"], "sample_ids": ["vddP56-ogds", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["water, flow, laugh", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows a man speaking?", "label": 0}, {"captions": ["someone whistles briefly", "a car speeding up in the distance"], "sample_ids": ["uFoga8sHpiw", "u0TrcHhkPQ"], "start_seconds": ["90", "20"], "properties": ["sound, duration, pitch", "distance, car, speed"], "captions_pred_video": ["footage of a bird in a cage", null], "captions_pred_audio": ["a person whistles a song", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "vehicles pass by on a roadway"], "sample_ids": ["zl9Dqx-j7q4", "tgbONvsP47Y"], "start_seconds": ["6", "0"], "properties": ["motors rev, laugh, loudly", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a man driving a car in the dark", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a jet engine roars ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "some men converse over an engine running"], "sample_ids": ["zofjfKhqLk8", "sCiy7QS1U"], "start_seconds": ["10", "300"], "properties": ["background, metal, clings", "men, converse, engine"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a machine?", "label": 0}, {"captions": ["a horn blasts loudly as a train passes", "a man speaks as a vehicle engine idles"], "sample_ids": ["zsLxS-uLJTw", "shmR4OZtzqA"], "start_seconds": ["20", "30"], "properties": ["horn, blast, train", "man, engine, idle"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a man speaks while a motor runs"], "question": "which entity is stationary", "label": 1}, {"captions": ["water flows followed by women screaming", "water is sprayed across a hard surface"], "sample_ids": ["w5W5Kqtc8E", "sQwlkXjQabo"], "start_seconds": ["100", "10"], "properties": ["water, flow, women", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "spraying followed by silence"], "question": "which entity is a video of water flowing?", "label": 0}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "water is sprayed across a hard surface"], "sample_ids": ["y8WEcpOlT3I", "sQwlkXjQabo"], "start_seconds": ["40", "10"], "properties": ["harsh, wind, blows", "water, spray, surface"], "captions_pred_video": ["on how to use a sewing machine youtube", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a clock ticktocks briefly", "a man is snoring loudly and repeatedly"], "sample_ids": ["u7C-AEBQM", "sncRqQ67iJU"], "start_seconds": ["30", "460"], "properties": ["ticktocks, clock, ticktocks briefly", "loud, repeatedly, man"], "captions_pred_video": [null, "of an airplane flying in the dark sky at night"], "captions_pred_audio": ["a ticktock of a clock", "a person is snoring"], "question": "which entity is louder", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "multiple people speak and children yell while water gurgles"], "sample_ids": ["rqu8iB22IY", "vb1fPSDI4c"], "start_seconds": ["5", "30"], "properties": ["sound, repeats, laugh", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking?", "label": 1}, {"captions": ["a woman speaks followed by clicks and scraping", "a fly buzzes around loudly as birds chirp"], "sample_ids": ["yYJksgsxx5U", "uJV8NDaHqqk"], "start_seconds": ["30", "100"], "properties": ["audio, clicks, scraping", "loud, fly, chirp"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "a bee hive in a wooden box"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a swarm of bees buzzing around"], "question": "which entity is louder", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "a propeller moves loudly nearby"], "sample_ids": ["sHbXC6na9hg", "ugHJF0hfYkg"], "start_seconds": ["0", "10"], "properties": ["a person, saw, wood", "loud, propeller, move"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["an engine is idling and vibrating", "a helicopter is flying overhead "], "question": "which entity is moving", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "a toilet flushes and water drains"], "sample_ids": ["u--KhUW8l1Y", "sfAvvZwdLCY"], "start_seconds": ["0", "20"], "properties": ["horn, siren, life", "water drains, flushes, water"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a person speaks as a cage rattles, birds chips and flap wings in the background", "birds chirp and a man speaks"], "sample_ids": ["v0wPrLBI3hg", "zuua6-5goWw"], "start_seconds": ["30", "30"], "properties": ["background, person, cage", "chirp, speak, bird"], "captions_pred_video": ["footage of the pigeons feeding on the ground", "in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot"], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "birds are chirping and a man is speaking with background noise "], "question": "which entity has a person speaking and birds chirping?", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "birds chirp and objects are moved around"], "sample_ids": ["xKB8O8LTs6s", "yPUYU6t3rwo"], "start_seconds": ["70", "370"], "properties": ["music, gunfire, explosion", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "insects buzz and a man speaks"], "question": "which entity is more calm", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["wTideSjRFS0", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["food, sizzle, woman", "engine, revs, vehicle"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a race car accelerates and revs its engine "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "a duck quacks continuously"], "sample_ids": ["vddP56-ogds", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["water, splash, person, laugh", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["speaking following by laughing and clapping", "wind blows as people chatter quietly"], "sample_ids": ["u2f5NpsoHBg", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["person, laugh, clap", "wind, chatter, people"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["spYNpeN7rPY", "uEU-Hg5MTN8"], "start_seconds": ["1", "27"], "properties": ["a clock, ticktock, man", "animal, grunts, snorts"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity has a clock ticking?", "label": 0}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "a duck quacks continuously"], "sample_ids": ["weDbePuc-Xc", "vh30P49Po6s"], "start_seconds": ["40", "30"], "properties": ["cartoon character, music, vocalize", "quacks, continuously, duck"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a duck is quacking loudly"], "question": "which entity is a duck?", "label": 1}, {"captions": ["bees buzz and wind blows", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["tMJne1a4AFI", "tw76HGONaKg"], "start_seconds": ["0", "570"], "properties": ["bees buzz, wind blows, bees", "A, game, keyboard"], "captions_pred_video": ["a swarm of bees on the ground", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man speaks and types on a computer keyboard "], "question": "which entity is not a video game?", "label": 0}, {"captions": ["a drill runs and two people laugh", "a clock ticktocks"], "sample_ids": ["tEE3MpBt1sg", "v-g-j2uTByM"], "start_seconds": ["50", "30"], "properties": ["two people, laugh, drill", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["a railroad crossing bell rings as a train horn blows", "people applaud and hoot and chat quietly"], "sample_ids": ["tZGN5a7ybxo", "wwyfGO2J4"], "start_seconds": ["60", "90"], "properties": ["ring, train, horn", "people, applaud, hoot"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", null], "captions_pred_audio": ["a train is moving and blowing its horn ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a kid speaks followed by music playing", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["tQWGZLItBXk", "tw76HGONaKg"], "start_seconds": ["170", "570"], "properties": ["music, kid, speak", "A, game, keyboard"], "captions_pred_video": ["worms revolution screenshots", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a man speaks and types on a computer keyboard "], "question": "which entity has a keyboard?", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "an adult speaks and is typing on a computer keyboard"], "sample_ids": ["tDVADusiIoc", "x9JovgqUcs"], "start_seconds": ["60", "500"], "properties": ["water, radio, man", "An adult is speaking, typing, and using a computer keyboard"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man speaks and types on a keyboard"], "question": "which entity is indoors", "label": 1}, {"captions": ["a man talks as several small engines run", "a duck quacks continuously"], "sample_ids": ["u9A6VZQCZpU", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["a, man, talk", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["ul60S8TXDA8", "tDVADusiIoc"], "start_seconds": ["60", "60"], "properties": ["sound, distance, bell", "water, radio, man"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["a weapon fires multiple times", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["sMC07Ucy7kg", "y2bVZ7rz-5M"], "start_seconds": ["10", "280"], "properties": ["weapon, fire, multiple", "motor noise, horn, siren"], "captions_pred_video": ["footage is from a car's point of view", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is not a weapon?", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "wind blows and women speak as livestock vocalizes"], "sample_ids": ["xjvTpk2Zpr8", "vXlk0lIQBFo"], "start_seconds": ["70", "470"], "properties": ["wind, blows, vehicle", "wind, speak, vocalize"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "- a woman and two donkeys in a fenced in area"], "captions_pred_audio": ["a jet engine roars and wind blows ", "wind chimes are ringing and people are speaking and laughing "], "question": "which entity is more calm", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "wind blows as people chatter quietly"], "sample_ids": ["zofjfKhqLk8", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["noise, stop, motor", "wind, chatter, people"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage is blurry and out of focus"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vf44CgrjT0A", "xKB8O8LTs6s"], "start_seconds": ["20", "70"], "properties": ["loud, long, person", "music, gunfire, explosion"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a loud burp", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a door slams shut roughly", "multiple people speak and children yell while water gurgles"], "sample_ids": ["zkKdxzNC97Y", "vb1fPSDI4c"], "start_seconds": ["27", "30"], "properties": ["a door, slams, shut", "multiple, people, yell"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "a woman speaks happily and an animal chirps"], "sample_ids": ["tQWGZLItBXk", "uWAAAL4CIoc"], "start_seconds": ["170", "0"], "properties": ["voice, music, whoosh", "a woman, chirps, animal"], "captions_pred_video": ["worms revolution screenshots", null], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a woman is speaking and a dog is barking "], "question": "which entity has a woman speaking and an animal chirps?", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "some men converse over an engine running"], "sample_ids": ["s4Uz1Ffgo04", "sCiy7QS1U"], "start_seconds": ["100", "300"], "properties": ["water, rushes, vehicle", "men, converse, engine"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a video of a vehicle zooming past?", "label": 0}, {"captions": ["multiple motorcycles pass by as a man speaks", "several insects fly while two men talk"], "sample_ids": ["zcDwZ6W7E3E", "s-T9OVOiMLo"], "start_seconds": ["180", "330"], "properties": ["man, speak, motorcycles", "several, fly, men"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a man speaking to multiple motorcycles?", "label": 0}, {"captions": ["water pouring and bubbling", "winds blows roughly as a vehicle races past"], "sample_ids": ["uyRfq-jKPpo", "xjvTpk2Zpr8"], "start_seconds": ["50", "70"], "properties": ["water, bubbles, pouring", "wind, blows, vehicle"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["water is running from a faucet", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a diesel truck engine runs while wind blows", "a man speaks as a car is passing by"], "sample_ids": ["xyL9F5VrjkE", "sK4u5T8hW78"], "start_seconds": ["20", "30"], "properties": ["engine, run, wind", "a, car, pass"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man is speaking with background noise and breathing sounds "], "question": "which is not a vehicle", "label": 1}, {"captions": ["a woman and man speak while food is frying", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["zk-xJGQU8-4", "wqZ135Ssz0"], "start_seconds": ["130", "60"], "properties": ["food, man, woman", "two men, woman, birds"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", null], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["people speak then an engine runs", "a woman speaks happily and an animal chirps"], "sample_ids": ["uMTTDZ2mb4", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["engine, run, people", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a woman is speaking and a dog is barking "], "question": "which entity is about a woman speaking and an animal chirping?", "label": 1}, {"captions": ["a flush is followed by gurgling water, then another flush", "vehicles pass by on a roadway"], "sample_ids": ["tqR406bGiE", "tgbONvsP47Y"], "start_seconds": ["40", "0"], "properties": ["flush, water, gurgle", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a toilet is flushed", "a car is driving on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a dog barks and whimpers", "water flows and trickles"], "sample_ids": ["sShpyu2l4YQ", "tB7hWb9gTuQ"], "start_seconds": ["0", "30"], "properties": ["barks, whimpers, dog", "water, flow, trickle"], "captions_pred_video": ["the puppies are playing with a toy", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a dog is barking and growling", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["w2JXXIAdUdg", "wDVMhEdTiVw"], "start_seconds": ["10", "30"], "properties": ["snoring, distance, person", "gun, shoot, water"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to cause injury", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "wind blows as people chatter quietly"], "sample_ids": ["w2M4i1mklOA", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["loud, chime, bell", "wind, chatter, people"], "captions_pred_video": ["footage of an antique clock", "footage is blurry and out of focus"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "water is sprayed across a hard surface"], "sample_ids": ["su6FAOcOA8c", "sQwlkXjQabo"], "start_seconds": ["4", "10"], "properties": ["engine, run, woman", "water, spray, surface"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "a child speaks in closed space"], "sample_ids": ["tPJvjq9QePY", "yW6FWLSLkx4"], "start_seconds": ["40", "40"], "properties": ["animal, bleat, moo", "child, space, speak"], "captions_pred_video": ["a dog and a sheep in a barn", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a baby cries and a man speaks", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vSeGhaZt-aI", "yajyRTUQk3U"], "start_seconds": ["50", "400"], "properties": ["water, bubbles, run", "a woman, something, fried"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["wyllXV6PjKo", "vlS6YMeWAPo"], "start_seconds": ["30", "40"], "properties": ["a kid, talk, cry", "sheep, baa, birds"], "captions_pred_video": [null, "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a woman speaks and a baby cries", "a goat bleats and birds chirp"], "question": "which entity is about animals?", "label": 1}, {"captions": ["birds chirp and a dog breathes heavily", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["y2ZBGpgbhHM", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["dog, chirp, breathe", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["birds chirping and a dog panting", "a man is speaking with wind noise in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "people cheer as a vehicle engine revs"], "sample_ids": ["vuUVPzd2FXw", "xjhAnI2q6hM"], "start_seconds": ["160", "6"], "properties": ["a, steam, release", "engine revs, vehicle, people"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "a child speaks in closed space"], "sample_ids": ["y8WEcpOlT3I", "yW6FWLSLkx4"], "start_seconds": ["40", "40"], "properties": ["harsh, wind, blows", "child, space, speak"], "captions_pred_video": ["on how to use a sewing machine youtube", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vzxHnu-SFEw", "ukg5L09Wpvo"], "start_seconds": ["80", "150"], "properties": ["two objects, woman, speak", "clickety-clack, train, whistle"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["goats bleat and metal clings", "someone is typing on a computer keyboard"], "sample_ids": ["tH17JPjDPnc", "v0x1odnXtP0"], "start_seconds": ["260", "210"], "properties": ["bleat, metal, clings", "keyboard, type, computer"], "captions_pred_video": ["feed of the goats eating hay in the barn", "how to make money on youtube in spanish"], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a dog barks and whimpers", "people speak in a closed space"], "sample_ids": ["sShpyu2l4YQ", "sTpirNYo8vQ"], "start_seconds": ["0", "30"], "properties": ["barks, whimpers, dog", "people, space, speak"], "captions_pred_video": ["the puppies are playing with a toy", "of a man taking a selfie on a bus"], "captions_pred_audio": ["a dog is barking and growling", "a man is speaking while a car is revving and accelerating "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["an engine revs and a turning noise is made", "a vehicle engine accelerating then running on idle"], "sample_ids": ["tOSWIURC-4", "vYkA3cfXp5Q"], "start_seconds": ["0", "30"], "properties": ["noise, engine, revs", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a lawn mower is running ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "a man speaks as a car is passing by"], "sample_ids": ["yZmhM1HcsyE", "sK4u5T8hW78"], "start_seconds": ["4", "30"], "properties": ["engine, roar, water", "a, car, pass"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["people speak then an engine runs", "people cheer as a vehicle engine revs"], "sample_ids": ["uMTTDZ2mb4", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["engine, run, people", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity shows people speaking?", "label": 0}, {"captions": ["dogs bark as an engine runs and a person whistles", "people cheer as a vehicle engine revs"], "sample_ids": ["zY3icUyMdh8", "xjhAnI2q6hM"], "start_seconds": ["20", "6"], "properties": ["dog, bark, engine", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a truck is revving its engine and a man is speaking "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "a man speaks as a car is passing by"], "sample_ids": ["w0xsN8X18Y", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["music, surface, rain", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["birds fly and flutter around", "water splashing and a person laughs in the distance then a man speaks nearby"], "sample_ids": ["wGKgwOP3h30", "vddP56-ogds"], "start_seconds": ["30", "30"], "properties": ["fly, flutter, around", "water, splash, person, laugh"], "captions_pred_video": ["of the pigeons in the coop", null], "captions_pred_audio": ["pigeons coo and flap their wings", "water is running and gurgling and a man is speaking"], "question": "which entity is more active", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "a man sprays as a scraping occurs in the background"], "sample_ids": ["y4tPJXBKDig", "sOa7g-44Dag"], "start_seconds": ["20", "30"], "properties": ["a, noise, talk", "background, man, spray"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a man is speaking and rubbing his hands together "], "question": "which entity is a person", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "a man speaks followed by another man speaking outside"], "sample_ids": ["zcDwZ6W7E3E", "viuTg1M-dqg"], "start_seconds": ["180", "30"], "properties": ["man, speak, motorcycles", "two men, speak, follow"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "multiple people speak and children yell while water gurgles"], "sample_ids": ["xV7Mg1QucSc", "vb1fPSDI4c"], "start_seconds": ["14", "30"], "properties": ["alarm, ticktocks, laughs", "multiple, people, yell"], "captions_pred_video": ["a cuckoo clock hanging on the wall", null], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "pigeons vocalize and birds chirp"], "sample_ids": ["yYEVLuqEytU", "uiS58TNyUiw"], "start_seconds": ["40", "430"], "properties": ["animal, pig, background", "vocalize, bird, chirp"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "of the pigeon in the cage"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["an insect buzzes around continuously", "an infant crying as a woman laughs"], "sample_ids": ["v25l1jef3JY", "xhmRY9yhC7c"], "start_seconds": ["0", "20"], "properties": ["buzzes, continuously, insect", "a, laugh, infant"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a baby cries and a woman speaks"], "question": "which entity is not a person", "label": 0}, {"captions": ["a man speaks followed by another man speaking outside", "water flows and trickles"], "sample_ids": ["viuTg1M-dqg", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["two men, speak, follow", "water, flow, trickle"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a drill runs and two people laugh", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["tEE3MpBt1sg", "vfYTJq7nU"], "start_seconds": ["50", "130"], "properties": ["two people, laugh, drill", "rustling, ducks, quack"], "captions_pred_video": ["footage is blurry due to the smoke in the air", null], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a duck quacks and a woman speaks"], "question": "which entity is about a drill?", "label": 0}, {"captions": ["a toilet flushes and water sputters as it drains", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["smGI3C1NZc", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["water, drain, toilet", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking and a dog is whimpering"], "question": "which entity is a video of a toilet?", "label": 0}, {"captions": ["someone sprays liquid onto a hard surface", "vehicles pass by on a roadway"], "sample_ids": ["sQwlkXjQabo", "tgbONvsP47Y"], "start_seconds": ["10", "0"], "properties": ["liquid, surface, spray", "pass, vehicle, roadway"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "footage of a fire truck entering a garage"], "captions_pred_audio": ["spraying followed by silence", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks as a car is passing by", "an infant crying as a woman laughs"], "sample_ids": ["sK4u5T8hW78", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["a, car, pass", "a, laugh, infant"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["w5W5Kqtc8E", "vlS6YMeWAPo"], "start_seconds": ["100", "40"], "properties": ["water, splashes, motorboat", "sheep, baa, birds"], "captions_pred_video": [null, "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a machine runs continuously", "wind blowing followed by a zoom"], "sample_ids": ["wdXV3Pv0jiY", "vr8ZXjEBhMQ"], "start_seconds": ["11", "150"], "properties": ["machine, running, continuously", "wind, blow, zoom"], "captions_pred_video": ["footage is blurry and shaky", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is not running continuously", "label": 1}, {"captions": ["scraping and female speech with distant music", "a woman speaks and is crumpling paper"], "sample_ids": ["yHeVV-xeOxQ", "xvDdE3zNf8Y"], "start_seconds": ["130", "120"], "properties": ["female, speech, music", "A, crumple, paper"], "captions_pred_video": ["of a girl milking a goat's udder", "of a woman in a white shirt and glasses holding a purple tie"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a woman speaks and crumples paper"], "question": "which entity is a woman speaking?", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "rain falls on a surface as men speak and music plays"], "sample_ids": ["spYNpeN7rPY", "w0xsN8X18Y"], "start_seconds": ["1", "30"], "properties": ["a clock, ticktock, man", "music, surface, rain"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", null], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a man is speaking while a motorboat is moving in the background "], "question": "which entity has a clock ticking?", "label": 0}, {"captions": ["a man speaks as bees buzz and birds chirp", "a person snores loudly multiple times at a close distance"], "sample_ids": ["t25U-v4k4ts", "sSMl2vc3ek"], "start_seconds": ["40", "20"], "properties": ["bees buzz, birds chirp, man speaks", "loud, multiple, distance"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", null], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["electronic beeps occur in a short series", "a clock ticktocks"], "sample_ids": ["y682ml90jGw", "v-g-j2uTByM"], "start_seconds": ["11", "30"], "properties": ["beeps, series, electronic", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a beeping sound is being made ", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["vW4x7S1VfQc", "tw76HGONaKg"], "start_seconds": ["150", "570"], "properties": ["clacking, oil, woman", "A, game, keyboard"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["food sizzles in a frying pan", "a man speaks and types on a computer keyboard "], "question": "which entity is a video of a man playing a video game?", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["wudZTNBtVqc", "wz7N8YRy74I"], "start_seconds": ["60", "30"], "properties": ["accelerates, engine, wind", "rooster, crow, background, men"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man speaking to a rooster?", "label": 1}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "an airplane engine spools and people speak"], "sample_ids": ["yNtRmrn0io8", "wTjoRj1se3U"], "start_seconds": ["210", "390"], "properties": ["storm, distance, strike", "airplane, engine, spool"], "captions_pred_video": ["footage of a house in the middle of the night", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["rain falls and thunder roars", "a jet engine is running and people are talking"], "question": "which is a moving object", "label": 1}, {"captions": ["water running down a sink while a man is talking", "a telephone rings followed by a woman talking"], "sample_ids": ["vSeGhaZt-aI", "tGcFnX0GHI"], "start_seconds": ["50", "0"], "properties": ["water, sink, talk", "ring, talk, woman"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", null], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "birds chirp and an owl hoots before a man speaks briefly"], "sample_ids": ["wvKpEYswXO0", "wRBHTgrbiwg"], "start_seconds": ["150", "50"], "properties": ["water, tap, run", "bird, owl, speak"], "captions_pred_video": ["of the person preparing food in the kitchen", "of a bee pollinating the flowers in the field"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "birds are chirping and insects are buzzing"], "question": "which entity has more birds", "label": 1}, {"captions": ["multiple ducks quack continuously", "a stream of water flows as people talk and wind blows"], "sample_ids": ["wfHeoPDLMaM", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["multiple, quack, continuously", "stream, water, flow"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "footage is blurry and out of focus"], "captions_pred_audio": ["ducks are quacking", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["weDbePuc-Xc", "yDoT73BWsdA"], "start_seconds": ["40", "10"], "properties": ["music, slaps, human", "engine, revs, vehicle"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["roadway noise occurs and a truck accelerates", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["tgbONvsP47Y", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["noise, truck, accelerate", "engine, idle, woman"], "captions_pred_video": ["footage of a fire truck entering a garage", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a car is driving on the road ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["xyL9F5VrjkE", "yajyRTUQk3U"], "start_seconds": ["20", "400"], "properties": ["wind, motor, distance", "a woman, something, fried"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "- a woman cooking in the kitchen"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a duck quacks several times", "a train horn blows as it passes by"], "sample_ids": ["vh30P49Po6s", "zVacuqSb4LI"], "start_seconds": ["30", "30"], "properties": ["quacks, duck, several", "horn, blows, train"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a duck is quacking loudly", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which is louder", "label": 0}, {"captions": ["wind blows strongly", "a clock ticktocks"], "sample_ids": ["w8uLijTqtlU", "v-g-j2uTByM"], "start_seconds": ["70", "30"], "properties": ["wind, blows, strongly", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage is blurry and shaky", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["the wind is blowing strongly", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "paper is crumpling consistently"], "sample_ids": ["w2JXXIAdUdg", "v5cSxLaHADY"], "start_seconds": ["10", "0"], "properties": ["snoring, distance, person", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a person snoring and a dog whimpering", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a drill runs and two people laugh", "a horn rings out as a machine runs by"], "sample_ids": ["tEE3MpBt1sg", "slZLHwNbbt4"], "start_seconds": ["50", "300"], "properties": ["two people, laugh, drill", "a, horn, run"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "a man speaks while a machine runs before a smoke alarm beeps"], "sample_ids": ["siJFXfGWgDk", "sG7TyPnFDR0"], "start_seconds": ["50", "180"], "properties": ["a, bird, vehicle", "beeps, machine, smoke alarm"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "a person is using an espresso machine in a restaurant"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking and a microwave oven is beeping "], "question": "which entity has a vehicle passing nearby?", "label": 0}, {"captions": ["a male speaks and another male speaks", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["viuTg1M-dqg", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["two males, speaking, male", "animal, grunts, snorts"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking and a baby is crying"], "question": "which entity has a more snorts", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["zuua6-5goWw", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["birds, chirp, quiet, man, speaks", "men, talk, cars"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "pigeons vocalize and birds chirp"], "sample_ids": ["zkKdxzNC97Y", "uiS58TNyUiw"], "start_seconds": ["27", "430"], "properties": ["loud, bang, noise", "vocalize, bird, chirp"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "of the pigeon in the cage"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background"], "sample_ids": ["tiDFTC-5vU", "yYEVLuqEytU"], "start_seconds": ["30", "40"], "properties": ["male, duck, laugh", "animal, pig, background"], "captions_pred_video": [null, "a baby goat is being petted by a person's hand"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "several sheep bleat and a man speaks"], "question": "which entity has a pig in it?", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "birds chirp and objects are moved around"], "sample_ids": ["yYEVLuqEytU", "yPUYU6t3rwo"], "start_seconds": ["40", "370"], "properties": ["animal, pig, background", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["several sheep bleat and a man speaks", "insects buzz and a man speaks"], "question": "which entity has more birds", "label": 1}, {"captions": ["a child speaks in closed space", "multiple people speak and children yell while water gurgles"], "sample_ids": ["yW6FWLSLkx4", "vb1fPSDI4c"], "start_seconds": ["40", "30"], "properties": ["child, space, speak", "multiple, people, yell"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", null], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "a duck quacks continuously"], "sample_ids": ["wy1eKjR7KC0", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["people, talk, distance", "quacks, continuously, duck"], "captions_pred_video": ["two police officers riding motorcycles down the street", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking and a siren is going off", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["s4Uz1Ffgo04", "zFjIWfSD-4"], "start_seconds": ["100", "410"], "properties": ["roars, background, people speaking", "People, motor, brakes"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["an small aircraft engine runs and a boy speaks", "an airplane engine runs"], "sample_ids": ["xSKJGCItUWE", "yVPZ2MNWpms"], "start_seconds": ["10", "0"], "properties": ["engine, run, boy", "engine, airplane, runs"], "captions_pred_video": ["footage of the helicopter flying in the room", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a car is driving by on the road "], "question": "which entity has a boy speaking?", "label": 0}, {"captions": ["a car accelerates and wind blows", "a man talks while a clock does ticktock"], "sample_ids": ["u0TrcHhkPQ", "spYNpeN7rPY"], "start_seconds": ["20", "1"], "properties": ["accelerates, wind, blows", "a clock, ticktock, man"], "captions_pred_video": [null, "in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and breathing with background noise "], "question": "which entity is stationary", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a stream of water runs briefly"], "sample_ids": ["ugHJF0hfYkg", "x-PeY8Yb8M4"], "start_seconds": ["10", "300"], "properties": ["engine, running, continuously", "stream, water, run"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a helicopter is flying overhead ", "a car is driving on a wet road "], "question": "which entity is running continuously", "label": 0}, {"captions": ["a woman talking as an infant is crying", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["tMbMDvT50j8", "y8WEcpOlT3I"], "start_seconds": ["12", "40"], "properties": ["a, talk, infant", "harsh, wind, blows"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking with wind noise in the background "], "question": "which entity is about a woman talking to an infant?", "label": 0}, {"captions": ["a man speaks, then dials a rotary telephone", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["tK4VlLsNxak", "tDVADusiIoc"], "start_seconds": ["120", "60"], "properties": ["a, dial, telephone", "water, radio, man"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "an infant crying frantically"], "sample_ids": ["sjlVMgdGSK0", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["accelerates, vehicle, race car", "cry, infant, frantically"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "of the baby crying in the car seat"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a baby cries loudly"], "question": "which entity is a human", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "a clock ticktocks"], "sample_ids": ["vbr9mHKc8WM", "v-g-j2uTByM"], "start_seconds": ["40", "30"], "properties": ["noise, loudness, engine", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["an engine is idling", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "pigeons vocalize and birds chirp"], "sample_ids": ["vSeGhaZt-aI", "uiS58TNyUiw"], "start_seconds": ["50", "430"], "properties": ["water, bubbles, speak", "vocalize, bird, chirp"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a male speaks and another male speaks", "water flows and trickles"], "sample_ids": ["viuTg1M-dqg", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["two males, speaking, male", "water, flow, trickle"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["zALy31PjDl0", "uYT5gxnyMWM"], "start_seconds": ["21", "50"], "properties": ["a man, a vehicle, a horn", "female, spraying, scream"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a woman is speaking and a baby is crying"], "question": "which entity is about a vehicle?", "label": 0}, {"captions": ["a child speaks", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["yW6FWLSLkx4", "wqZ135Ssz0"], "start_seconds": ["40", "60"], "properties": ["a, child, speaks", "two men, woman, birds"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", null], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a drill runs and two people laugh", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tEE3MpBt1sg", "yajyRTUQk3U"], "start_seconds": ["50", "400"], "properties": ["two people, laugh, drill", "a woman, something, fried"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "- a woman cooking in the kitchen"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["someone snores nearby", "people speak softly as food sizzles"], "sample_ids": ["spJCm8tD9Zo", "yhQ2Lg-7qDY"], "start_seconds": ["90", "130"], "properties": ["someone snores, nearby, someone", "food, sizzle, speak"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a pan filled with meat and sauce being cooked on a stove top"], "captions_pred_audio": ["a person is snoring loudly", "a faucet is running and a man is speaking"], "question": "which entity is more active", "label": 1}, {"captions": ["an electronic device bleeps once", "an airplane engine roars increasingly louder"], "sample_ids": ["tHJ6JSa8Y4", "vBslzh7saPw"], "start_seconds": ["0", "90"], "properties": ["bleeps, electronic, device", "engine, roar, louder"], "captions_pred_video": [null, "a pickup truck carrying a large object down the road"], "captions_pred_audio": ["a clock is ticking and beeping", "a jet engine roars and accelerates "], "question": "which entity is louder", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a baby cries and fusses, a woman speaks, and a man speaks"], "sample_ids": ["tOSWIURC-4", "wyllXV6PjKo"], "start_seconds": ["0", "30"], "properties": ["engine, work, nearby", "a baby, a woman, a man"], "captions_pred_video": [null, null], "captions_pred_audio": ["a lawn mower is running ", "a woman speaks and a baby cries"], "question": "which entity is a person", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vXlk0lIQBFo", "xBxDz0CFVn0"], "start_seconds": ["470", "30"], "properties": ["wind, talk, vocalize", "stream, water, flow"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "footage is blurry and out of focus"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing as people talk and wind blows?", "label": 1}, {"captions": ["material crumbles into a microphone", "a car speeds away loudly followed by a car revving loudly and driving away while outside"], "sample_ids": ["vofpvUo6NAw", "sjlVMgdGSK0"], "start_seconds": ["220", "30"], "properties": ["material, crumbles, microphone", "car, revving, loudly"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "a 1965 ford falcon drag racing at 100mph on a 1/8 mile track"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a toilet flushes and water drains", "water splashes as an animal walks through"], "sample_ids": ["sfAvvZwdLCY", "w1ir-sZ3Im8"], "start_seconds": ["20", "90"], "properties": ["water drains, flushes, water", "animal, water, splashes"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a toilet is flushed", "water splashes and gurgles as people speak"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a stream of water flows quickly", "an audience gives applause"], "sample_ids": ["wbHTKEJZyhc", "x6iCUDmRpKQ"], "start_seconds": ["20", "38"], "properties": ["stream, water, flow", "applause, audience, give"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "a black background with the moon and stars in the sky"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a group of people are clapping and cheering"], "question": "which entity is a human activity", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vcmWSmvti8", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["music, man, fire", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in it?", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "a clicking followed by some people laughing and a kid speaking"], "sample_ids": ["ugHJF0hfYkg", "vz8868znkVQ"], "start_seconds": ["10", "60"], "properties": ["loud, intense, propeller", "audio, click, kid speaking"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a video of a plane flying over a cloudy sky"], "captions_pred_audio": ["a helicopter is flying overhead ", "a baby is laughing and breathing with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a stream of water runs briefly", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["x-PeY8Yb8M4", "yDoT73BWsdA"], "start_seconds": ["300", "10"], "properties": ["stream, water, run", "engine, revs, vehicle"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a car is driving on a wet road ", "a race car accelerates and revs its engine "], "question": "which entity is a moving object", "label": 1}, {"captions": ["an engine runs and a man speaks", "people cheer as a vehicle engine revs"], "sample_ids": ["yT5WfYMRr-U", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["engine, run, man", "engine revs, vehicle, people"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["loud clanking and banging with brief male speech", "water splashes as an animal walks through"], "sample_ids": ["sWZzXuWYY", "w1ir-sZ3Im8"], "start_seconds": ["420", "90"], "properties": ["male, speech, banging", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "water splashes and gurgles as people speak"], "question": "which entity is more quiet", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "people speak as gunfire rings out"], "sample_ids": ["vdoxuJn9lTc", "wqTCwqVRDlk"], "start_seconds": ["40", "80"], "properties": ["burp, loud, girl", "gunfire, ring, speak"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a child speaks followed by a burp", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "a infant makes noise and is excited"], "sample_ids": ["u--KhUW8l1Y", "wIJK3-5y0kA"], "start_seconds": ["0", "30"], "properties": ["horn, siren, life", "noise, excited, infant"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["y2bVZ7rz-5M", "tdWhHV3X25Q"], "start_seconds": ["280", "60"], "properties": ["motor noise, horn, siren", "applause, audience, yells"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a man is speaking and a crowd is clapping"], "question": "which entity is a response to a performance", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "paper folding and crinkling"], "sample_ids": ["uZesmtKZGSw", "zPpG3RD8lSs"], "start_seconds": ["250", "20"], "properties": ["men, talk, cars", "paper, fold, crinkle"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "the wind blows and a mouse clicks "], "question": "which entity is more quiet", "label": 1}, {"captions": ["several insects fly while two men talk", "a horse runs while two women talk"], "sample_ids": ["s-T9OVOiMLo", "sdvI1mHAsc"], "start_seconds": ["330", "20"], "properties": ["several, fly, men", "two women, horse, run"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", null], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "horses clip-clop and a woman speaks"], "question": "which entity is a horse?", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["wnpJndXuxLc", "wz7N8YRy74I"], "start_seconds": ["50", "30"], "properties": ["blows, vehicle, train", "rooster, crow, background, men"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man speaking to a rooster?", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "people cheer as a vehicle engine revs"], "sample_ids": ["zkKdxzNC97Y", "xjhAnI2q6hM"], "start_seconds": ["27", "6"], "properties": ["hard, surface, door", "engine revs, vehicle, people"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a door is opened and closed", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "a woman speaks as she rubs two objects together"], "sample_ids": ["x6ijhqRY38s", "vzxHnu-SFEw"], "start_seconds": ["250", "80"], "properties": ["bowl, silverware, man", "two objects, woman, speak"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which object is being moved in the bowl", "label": 0}, {"captions": ["a clang followed by a toilet flushing", "water pouring and bubbling"], "sample_ids": ["wNZ5thZM7XU", "uyRfq-jKPpo"], "start_seconds": ["20", "50"], "properties": ["sound, flush, toilet", "water, bubbles, pouring"], "captions_pred_video": ["footage of a toilet in a bathroom stall", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a toilet flushes", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "a stream of water flows as people talk and wind blows"], "sample_ids": ["voJh2gJxXhA", "xBxDz0CFVn0"], "start_seconds": ["50", "30"], "properties": ["music, frog, croak", "stream, water, flow"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", "footage is blurry and out of focus"], "captions_pred_audio": ["music is playing and crickets are chirping ", "a man is speaking with wind noise in the background "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["sapQIQUhFc", "uEU-Hg5MTN8"], "start_seconds": ["280", "27"], "properties": ["liquid, flow, distance", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has a man talking nearby and another man talking far away?", "label": 0}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["su6FAOcOA8c", "tdWhHV3X25Q"], "start_seconds": ["4", "60"], "properties": ["engine, run, woman", "applause, audience, yells"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["zj2R0XoFr5k", "wDVMhEdTiVw"], "start_seconds": ["50", "30"], "properties": ["airplane, boy, fly", "gun, shoot, water"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used for hunting", "label": 1}, {"captions": ["birds chirp and wind blows", "a vehicle accelerates before a race car idles then accelerates quickly"], "sample_ids": ["sxIvBMSavMQ", "sjlVMgdGSK0"], "start_seconds": ["210", "30"], "properties": ["birds, chirp, wind", "accelerates, vehicle, race car"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "a 1965 ford falcon drag racing at 100mph on a 1/8 mile track"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "an insect buzzes around continuously"], "sample_ids": ["w0xsN8X18Y", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["music, surface, rain", "buzzes, continuously, insect"], "captions_pred_video": [null, "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "a man speaks followed by another man speaking outside"], "sample_ids": ["zofjfKhqLk8", "viuTg1M-dqg"], "start_seconds": ["10", "30"], "properties": ["noise, stop, motor", "two men, speak, follow"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a train horn sounds as the train approaches", "speaking following by laughing and clapping"], "sample_ids": ["slZLHwNbbt4", "u2f5NpsoHBg"], "start_seconds": ["300", "30"], "properties": ["train, horn, sound", "person, laugh, clap"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "is being projected on a screen at the front of the stage"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a woman is speaking and a crowd is clapping"], "question": "which entity is a person", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wTjoRj1se3U", "tdWhHV3X25Q"], "start_seconds": ["390", "60"], "properties": ["engine, run, people", "applause, audience, yells"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a jet engine is running and people are talking", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["frogs croak and vocalize", "a man speaks as a car is passing by"], "sample_ids": ["yswmmRZFItk", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["croak, vocalize, frog", "a, car, pass"], "captions_pred_video": ["a close up of a frog in the water", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a frog is croaking", "a man is speaking with background noise and breathing sounds "], "question": "which entity is not a frog?", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a woman speaks as she rubs two objects together"], "sample_ids": ["weDbePuc-Xc", "vzxHnu-SFEw"], "start_seconds": ["40", "80"], "properties": ["music, slaps, human", "two objects, woman, speak"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "people speak as gunfire rings out"], "sample_ids": ["smDKStoHBJo", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["a, infant, speaking", "gunfire, ring, speak"], "captions_pred_video": ["a man holding a crying baby in his arms", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking and a gun is fired"], "question": "which entity is about a woman speaking to an infant?", "label": 0}, {"captions": ["a woman talks while a baby cries and a man whispers", "a man speaks followed by another man speaking outside"], "sample_ids": ["smDKStoHBJo", "viuTg1M-dqg"], "start_seconds": ["0", "30"], "properties": ["a, talk, baby, cry", "two men, speak, follow"], "captions_pred_video": ["a man holding a crying baby in his arms", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a baby?", "label": 0}, {"captions": ["a duck quacks loudly and continuously", "a man speaks followed by another man speaking outside"], "sample_ids": ["vh30P49Po6s", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["loud, continuous, quacks", "two men, speak, follow"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking with background noise and breathing sounds "], "question": "which entity is quieter", "label": 1}, {"captions": ["running water in a faucet with some clinks", "a jet engine spools up and takes off"], "sample_ids": ["zNRChLjqcU", "vBslzh7saPw"], "start_seconds": ["220", "90"], "properties": ["water, faucet, run", "engine, spools, takes"], "captions_pred_video": [null, "a pickup truck carrying a large object down the road"], "captions_pred_audio": ["water is running from a faucet into a sink", "a jet engine roars and accelerates "], "question": "which entity is a moving object", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "winds blows roughly as a vehicle races past"], "sample_ids": ["sQGXqGcwOTc", "xjvTpk2Zpr8"], "start_seconds": ["3", "70"], "properties": ["audio, kid, giggles", "wind, blows, vehicle"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["sjlVMgdGSK0", "vlS6YMeWAPo"], "start_seconds": ["30", "40"], "properties": ["car, revving, loudly", "sheep, baa, birds"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a goat bleats and birds chirp"], "question": "which entity is quieter", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "a dog barks and whimpers"], "sample_ids": ["uJV8NDaHqqk", "sShpyu2l4YQ"], "start_seconds": ["100", "0"], "properties": ["loud, fly, chirp", "barks, whimpers, dog"], "captions_pred_video": ["a bee hive in a wooden box", "the puppies are playing with a toy"], "captions_pred_audio": ["a swarm of bees buzzing around", "a dog is barking and growling"], "question": "which entity is quieter", "label": 1}, {"captions": ["a dog barks and whimpers", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sShpyu2l4YQ", "xKB8O8LTs6s"], "start_seconds": ["0", "70"], "properties": ["barks, whimpers, dog", "music, gunfire, explosion"], "captions_pred_video": ["the puppies are playing with a toy", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a dog is barking and growling", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more calm", "label": 0}, {"captions": ["a guy speaks with birds chirping in the background", "a stream of water flows as people talk and wind blows"], "sample_ids": ["v5P-ThUCINM", "xBxDz0CFVn0"], "start_seconds": ["400", "30"], "properties": ["background, chirp, bird", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a man is speaking with wind noise in the background "], "question": "which entity is a natural scene", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a horn honks and then loudly blares"], "sample_ids": ["weDbePuc-Xc", "wnpJndXuxLc"], "start_seconds": ["40", "50"], "properties": ["music, slaps, human", "horn, honk, loud"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["a dog barks and whimpers", "someone is typing on a computer keyboard"], "sample_ids": ["sShpyu2l4YQ", "v0x1odnXtP0"], "start_seconds": ["0", "210"], "properties": ["barks, whimpers, dog", "keyboard, type, computer"], "captions_pred_video": ["the puppies are playing with a toy", "how to make money on youtube in spanish"], "captions_pred_audio": ["a dog is barking and growling", "a person is typing on a keyboard"], "question": "which entity is typing", "label": 1}, {"captions": ["a child speaks in closed space", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["yW6FWLSLkx4", "wz7N8YRy74I"], "start_seconds": ["40", "30"], "properties": ["child, space, speak", "rooster, crow, background, men"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "a person is snoring while sleeping"], "sample_ids": ["xV7Mg1QucSc", "vJrjSeP17yE"], "start_seconds": ["14", "40"], "properties": ["alarm, ticktocks, laughs", "a person is sleeping, snoring, person"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "a black background with a small plane flying in the sky"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a person snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "a whistling owl calls out repeatedly and insects screech"], "sample_ids": ["ukxt9I7eMMg", "w6RTHR6AeAg"], "start_seconds": ["30", "40"], "properties": ["continuous, woman, speaking", "call, owl, screech"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "an owl hoots and mechanisms operate "], "question": "which entity is a bird?", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "a baby cries and a woman speaks"], "sample_ids": ["tDlfY3nmx1A", "tMbMDvT50j8"], "start_seconds": ["160", "12"], "properties": ["applause, laugh, man", "a, cry, woman"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "shows a little girl covering her face with her hands while sitting at a table"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a baby cries and a woman speaks"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["dogs barking and whimpering", "water splashes as an animal walks through"], "sample_ids": ["tIY7qOV3rEM", "w1ir-sZ3Im8"], "start_seconds": ["0", "90"], "properties": ["barking, whimpering, dog", "animal, water, splashes"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["birds chirp and wind blows", "a girl talking, laughing and sneezing noise"], "sample_ids": ["sxIvBMSavMQ", "y4tPJXBKDig"], "start_seconds": ["210", "20"], "properties": ["birds, chirp, wind", "a, noise, talk"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "footage of the woman wiping her nose with a tissue"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a woman is speaking and coughing with background noise and breathing "], "question": "which entity is talking", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tGcFnX0GHI", "tdWhHV3X25Q"], "start_seconds": ["0", "60"], "properties": ["ring, talk, woman", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["someone whistles briefly", "small dogs yip and bark sharply"], "sample_ids": ["uFoga8sHpiw", "v-wcQf4BDY0"], "start_seconds": ["90", "120"], "properties": ["sound, duration, pitch", "bark, yip, sharply"], "captions_pred_video": ["footage of a bird in a cage", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a person whistles a song", "a dog barks and growls"], "question": "which entity is louder", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "a telephone rings followed by a woman talking"], "sample_ids": ["vs65y4qmyBE", "tGcFnX0GHI"], "start_seconds": ["340", "0"], "properties": ["engine, run, man", "ring, talk, woman"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a dial tone sounds followed by a woman speaking"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["wind blows strongly and a young man speaks", "a car revs and accelerates loudly and men and women chatter among themselves"], "sample_ids": ["vs65y4qmyBE", "y8dSeubCNI"], "start_seconds": ["340", "4"], "properties": ["wind, blows, strongly", "men, women, car"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "an engine revving and people talking in the background"], "question": "which entity is more likely to be in motion", "label": 1}, {"captions": ["a propeller moves loudly nearby", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["ugHJF0hfYkg", "tDVADusiIoc"], "start_seconds": ["10", "60"], "properties": ["loud, propeller, move", "water, radio, man"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not moving", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "winds blows roughly as a vehicle races past"], "sample_ids": ["sEprKHm8Sj8", "xjvTpk2Zpr8"], "start_seconds": ["90", "70"], "properties": ["car, tires, slows", "wind, blows, vehicle"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a jet engine roars and wind blows "], "question": "which vehicle is moving faster", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "motors rev and run loudly as a person laughs"], "sample_ids": ["uiItxDsDMFI", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["wood, piece, saw", "motors rev, laugh, loudly"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a saw is being used with background noise ", "a jet engine roars "], "question": "which entity is not a person?", "label": 0}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a vehicle is skidding and squealing tires"], "sample_ids": ["y2bVZ7rz-5M", "soTOh3zYJfY"], "start_seconds": ["280", "40"], "properties": ["motor noise, horn, siren", "vehicle, skid, tires"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "a red car drifting on a winding road with smoke coming out of it"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "birds chirp and objects are moved around"], "sample_ids": ["s4Uz1Ffgo04", "yPUYU6t3rwo"], "start_seconds": ["100", "370"], "properties": ["water, rushes, motorcycle", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "insects buzz and a man speaks"], "question": "which entity is more calm", "label": 1}, {"captions": ["water is sprayed across a hard surface", "several insects fly while two men talk"], "sample_ids": ["sQwlkXjQabo", "s-T9OVOiMLo"], "start_seconds": ["10", "330"], "properties": ["water, spray, surface", "several, fly, men"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["spraying followed by silence", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a video of a natural event", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "a woman speaks happily and an animal chirps"], "sample_ids": ["y8WEcpOlT3I", "uWAAAL4CIoc"], "start_seconds": ["40", "0"], "properties": ["wind, speak, buffeting", "a woman, chirps, animal"], "captions_pred_video": ["on how to use a sewing machine youtube", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking and a dog is barking "], "question": "which entity is more likely to be in a zoo", "label": 1}, {"captions": ["a child speaks in closed space", "someone snores nearby"], "sample_ids": ["yW6FWLSLkx4", "spJCm8tD9Zo"], "start_seconds": ["40", "90"], "properties": ["child, space, speak", "someone snores, nearby, someone"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "vehicles pass by on a roadway"], "sample_ids": ["zdYdyF9-m8U", "tgbONvsP47Y"], "start_seconds": ["7", "0"], "properties": ["wind, crash, shoreline", "pass, vehicle, roadway"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "footage of a fire truck entering a garage"], "captions_pred_audio": ["waves crash and wind blows ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "a duck quacks continuously"], "sample_ids": ["zj2R0XoFr5k", "vh30P49Po6s"], "start_seconds": ["50", "30"], "properties": ["airplane, fly, overhead", "quacks, continuously, duck"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "an adult speaks and is typing on a computer keyboard"], "sample_ids": ["xOZfdgAgJ9o", "x9JovgqUcs"], "start_seconds": ["40", "500"], "properties": ["woman, whimpering, speaking", "An adult is speaking, typing, and using a computer keyboard"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man speaks and types on a keyboard"], "question": "which entity is speaking", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "several insects fly while two men talk"], "sample_ids": ["w5W5Kqtc8E", "s-T9OVOiMLo"], "start_seconds": ["100", "330"], "properties": ["wind, blow, vehicle", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about flying insects?", "label": 1}, {"captions": ["a man woman speak while crickets sing", "a person speaks briefly"], "sample_ids": ["zTLVJCo4WEE", "zOZleIRqZm4"], "start_seconds": ["30", "80"], "properties": ["a, crickets, sing", "person, talk, brief"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man is speaking with crickets chirping in the background"], "question": "which entity is a person talking?", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "a vehicle engine accelerating then running on idle"], "sample_ids": ["s4Uz1Ffgo04", "vYkA3cfXp5Q"], "start_seconds": ["100", "30"], "properties": ["water, rushes, vehicle", "engine, accelerate, idle"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "an engine is idling"], "question": "which entity is a vehicle", "label": 1}, {"captions": ["dogs barking and whimpering", "someone whistles a tune"], "sample_ids": ["tIY7qOV3rEM", "sIXTftIuUgw"], "start_seconds": ["0", "90"], "properties": ["barking, whimpering, dog", "someone, tune, whistle"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", null], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a person whistling a song"], "question": "which entity is a human", "label": 1}, {"captions": ["a person whistles and clicks a mouse", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zCrAfDfv6-A", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["person, mouse, click", "applause, audience, yells"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a person whistles a song", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["y2ZBGpgbhHM", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["animal, growl, bird", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["birds chirping and a dog panting", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "water flows and trickles"], "sample_ids": ["vf44CgrjT0A", "tB7hWb9gTuQ"], "start_seconds": ["20", "30"], "properties": ["loud, long, person", "water, flow, trickle"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a loud burp", "water is splashing and gurgling"], "question": "which entity is flowing", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "a man speaks as a car is passing by"], "sample_ids": ["vJvryTwuAV8", "sK4u5T8hW78"], "start_seconds": ["16", "30"], "properties": ["audience, cheer, man", "a, car, pass"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a man speaking to an audience?", "label": 0}, {"captions": ["music plays followed by gunshots and then an explosion", "a man speaks over intermittent keyboard taps"], "sample_ids": ["xKB8O8LTs6s", "tw76HGONaKg"], "start_seconds": ["70", "570"], "properties": ["music, gunshots, explosion", "audio, man, keyboard"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man speaks and types on a computer keyboard "], "question": "which entity has a man speaking over intermittent keyboard taps?", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "a machine beeps continuously"], "sample_ids": ["wqUmIEzuNz4", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["frog, bird, vocalize", "beeps, machine, continuously"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", null], "captions_pred_audio": ["a cat meows and rustles", "a beeping sound is being made "], "question": "which entity is not a frog?", "label": 1}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["vfYTJq7nU", "wqZ135Ssz0"], "start_seconds": ["130", "60"], "properties": ["ducks, quack, man", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a duck quacks and a woman speaks", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more birds", "label": 1}, {"captions": ["a toilet flushes and water drains", "a person sneezes followed by another person speaking"], "sample_ids": ["sfAvvZwdLCY", "t8CV69hcvF0"], "start_seconds": ["20", "210"], "properties": ["water drains, flushes, water", "person, sneeze, follow"], "captions_pred_video": ["footage of the toilet in the bathroom", "of an airplane flying in the dark sky at night"], "captions_pred_audio": ["a toilet is flushed", "a woman sneezes and speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "a machine beeps continuously"], "sample_ids": ["ugHJF0hfYkg", "y682ml90jGw"], "start_seconds": ["10", "11"], "properties": ["loud, intense, propeller", "beeps, machine, continuously"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "a baby cries and fusses, a woman speaks, and a man speaks"], "sample_ids": ["sG7TyPnFDR0", "wyllXV6PjKo"], "start_seconds": ["180", "30"], "properties": ["beeps, machine, smoke alarm", "a baby, a woman, a man"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", null], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a woman speaks and a baby cries"], "question": "which entity has a baby?", "label": 1}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["wP8ZKrlx3oA", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["rain, storm, thunder", "female, spraying, scream"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a dog barks and whimpers", "a drill drills through something then people begin laughing"], "sample_ids": ["sShpyu2l4YQ", "tEE3MpBt1sg"], "start_seconds": ["0", "50"], "properties": ["barks, whimpers, dog", "drill, something, laugh"], "captions_pred_video": ["the puppies are playing with a toy", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a dog is barking and growling", "people are laughing breathing and speaking with background noise "], "question": "which entity is more likely to be a joke", "label": 1}, {"captions": ["distant humming of an engine", "an engine runs loudly"], "sample_ids": ["yVPZ2MNWpms", "vqZuVbG6-HI"], "start_seconds": ["0", "130"], "properties": ["sound, distance, engine", "loud, engine, run"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a car is driving by on the road ", "a lawn mower is running and men are speaking "], "question": "which engine is louder", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tGcFnX0GHI", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["ring, talk, woman", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a woman is speaking while food is frying in the background"], "question": "which entity is about a woman talking?", "label": 0}, {"captions": ["an engine starts and increases in power", "a stream of water runs briefly"], "sample_ids": ["zjTG0gaGCUI", "x-PeY8Yb8M4"], "start_seconds": ["80", "300"], "properties": ["power, increase, engine", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["wztCSUxOf8", "uEU-Hg5MTN8"], "start_seconds": ["130", "27"], "properties": ["a crowd, yells, applauds", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be at a sporting event", "label": 0}, {"captions": ["a man speaks while water drains", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["vSeGhaZt-aI", "zFjIWfSD-4"], "start_seconds": ["50", "410"], "properties": ["water, drain, man", "People, motor, brakes"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", null], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running?", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "vehicles pass by on a roadway"], "sample_ids": ["vJ7JPEFhyLA", "tgbONvsP47Y"], "start_seconds": ["16", "0"], "properties": ["three men, wind, flow", "pass, vehicle, roadway"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a car is driving on the road "], "question": "which entity is more likely to be in motion", "label": 1}, {"captions": ["ticking continues without interruption", "water flows as men speak and yell"], "sample_ids": ["v-g-j2uTByM", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["ticking, continuous, clock", "water, flow, men"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a clock is ticking loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not continuous", "label": 1}, {"captions": ["a car accelerates and wind blows", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["u0TrcHhkPQ", "wDVMhEdTiVw"], "start_seconds": ["20", "30"], "properties": ["accelerates, wind, blows", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be in a car", "label": 0}, {"captions": ["the revving of an engine throttle followed by a man speaking", "vehicles pass by on a roadway"], "sample_ids": ["tezvROoo4bs", "tgbONvsP47Y"], "start_seconds": ["40", "0"], "properties": ["audio, throttle, speaking", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a busy city street with cars parked on both sides of the road", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a car accelerates and revs while a man speaks ", "a car is driving on the road "], "question": "which entity is a video", "label": 1}, {"captions": ["a door opens and closes", "paper is crumpling consistently"], "sample_ids": ["vBHyYJ8pL0", "v5cSxLaHADY"], "start_seconds": ["2", "0"], "properties": ["open, close, door", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "a person speaks briefly"], "sample_ids": ["xvDdE3zNf8Y", "zOZleIRqZm4"], "start_seconds": ["120", "80"], "properties": ["a, female, speaks", "person, talk, brief"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking with crickets chirping in the background"], "question": "which entity is talking", "label": 1}, {"captions": ["water flows as men speak and yell", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["vJ7JPEFhyLA", "ziUT9IFTkjg"], "start_seconds": ["16", "10"], "properties": ["water, flow, men", "background, birds, rustling"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "a toilet flushes and a female speaks"], "sample_ids": ["vcmWSmvti8", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["music, man, fire", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a toilet flushes and a man speaks"], "question": "which entity is about a toilet?", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "a telephone rings followed by a woman talking"], "sample_ids": ["s6DESzUTGjY", "tGcFnX0GHI"], "start_seconds": ["16", "0"], "properties": ["wind, laugh, woman", "ring, talk, woman"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", null], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a dial tone sounds followed by a woman speaking"], "question": "which woman is talking", "label": 1}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vfYTJq7nU", "uZesmtKZGSw"], "start_seconds": ["130", "250"], "properties": ["ducks, quack, man", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["an animal quacks rapidly", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vh30P49Po6s", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["animal, quacks, rapidly", "airplane, boy, fly"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a duck is quacking loudly", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a train horn sounds as the train approaches", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["slZLHwNbbt4", "y2bVZ7rz-5M"], "start_seconds": ["300", "280"], "properties": ["train, horn, sound", "motor noise, horn, siren"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is a warning device", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "people speak as gunfire rings out"], "sample_ids": ["xO-Q2BlIIPU", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["two men, exclamation, speak", "gunfire, ring, speak"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and a gun is fired"], "question": "which entity shows a man speaking to another man?", "label": 0}, {"captions": ["an adult speaks and is typing on a computer keyboard", "a man speaks as water trickles down a stream"], "sample_ids": ["x9JovgqUcs", "sapQIQUhFc"], "start_seconds": ["500", "280"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "water, stream, trickles"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks and types on a keyboard", "a man is speaking and a stream is flowing in the background "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "a frog croaks as other frogs croak in the background"], "sample_ids": ["sEprKHm8Sj8", "yswmmRZFItk"], "start_seconds": ["90", "0"], "properties": ["car, tires, slows", "background, frog, croak"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "a close up of a frog in the water"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a frog is croaking"], "question": "which entity is a croaking animal?", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vXlk0lIQBFo", "zl9Dqx-j7q4"], "start_seconds": ["470", "6"], "properties": ["wind, speak, vocalize", "engine, laugh, loud"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "footage of a man driving a car in the dark"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "winds blows roughly as a vehicle races past"], "sample_ids": ["tiDFTC-5vU", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["male, duck, laugh", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["a speedboat passes quickly on the water", "water running down a sink while a man is talking"], "sample_ids": ["tjmoSi330GM", "vSeGhaZt-aI"], "start_seconds": ["23", "50"], "properties": ["speed, water, boat", "water, sink, talk"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "a man is speaking and pouring liquid with background noise "], "question": "which entity is stationary", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "people speak as gunfire rings out"], "sample_ids": ["x5cuQjOdM3E", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["cat, talk, meow", "gunfire, ring, speak"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking and a gun is fired"], "question": "which entity is more calm", "label": 0}, {"captions": ["a small engine spits as it runs", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sZvwOuuPGP0", "yajyRTUQk3U"], "start_seconds": ["50", "400"], "properties": ["spits, engine, runs", "a woman, something, fried"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a medium engine is running ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "a woman speaks happily and an animal chirps"], "sample_ids": ["y8WEcpOlT3I", "uWAAAL4CIoc"], "start_seconds": ["40", "0"], "properties": ["harsh, wind, blows", "a woman, chirps, animal"], "captions_pred_video": ["on how to use a sewing machine youtube", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking and a dog is barking "], "question": "which entity is more cheerful", "label": 1}, {"captions": ["birds coo incessantly", "water is sprayed across a hard surface"], "sample_ids": ["yZrFNS7GFBQ", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["coo, bird, incessant", "water, spray, surface"], "captions_pred_video": ["of the bird in the cage", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["an owl hoots in the background ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "someone is typing on a computer keyboard"], "sample_ids": ["xvDdE3zNf8Y", "v0x1odnXtP0"], "start_seconds": ["120", "210"], "properties": ["a, female, speaks", "keyboard, type, computer"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "how to make money on youtube in spanish"], "captions_pred_audio": ["a woman speaks and crumples paper", "a person is typing on a keyboard"], "question": "which is not a person", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "a man speaking with light rustling"], "sample_ids": ["vZAw4apG0Es", "zOZleIRqZm4"], "start_seconds": ["30", "80"], "properties": ["people, clock, converse", "light, rustling, man"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking with crickets chirping in the background"], "question": "which entity is a man speaking with light rustling?", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["w5W5Kqtc8E", "w5W5Kqtc8E"], "start_seconds": ["100", "100"], "properties": ["water, splashes, motorboat", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a person speaks briefly", "tapping occurs then a baby cries"], "sample_ids": ["zOZleIRqZm4", "wIJK3-5y0kA"], "start_seconds": ["80", "30"], "properties": ["person, talk, brief", "a, cry, baby"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a baby cries and a woman speaks"], "question": "which entity is a person talking?", "label": 0}, {"captions": ["loud clanking and banging with brief male speech", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["sWZzXuWYY", "xfaoyyzw2WU"], "start_seconds": ["420", "180"], "properties": ["male, speech, banging", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "multiple people speak and children yell while water gurgles"], "sample_ids": ["uWPRNLnpy7Y", "vb1fPSDI4c"], "start_seconds": ["10", "30"], "properties": ["accelerate, laugh, vehicle", "multiple, people, yell"], "captions_pred_video": ["is taken from a car driving down the street", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "a duck quacks continuously"], "sample_ids": ["sjlVMgdGSK0", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["car, revving, loudly", "quacks, continuously, duck"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "water is sprayed across a hard surface"], "sample_ids": ["xyx6eNVEYRY", "sQwlkXjQabo"], "start_seconds": ["380", "10"], "properties": ["loud, engine, muffles", "water, spray, surface"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "a race car approaches quickly and slows down squealing tires"], "sample_ids": ["yZrFNS7GFBQ", "sEprKHm8Sj8"], "start_seconds": ["30", "90"], "properties": ["pigeon, buzzes, insect", "car, tires, slows"], "captions_pred_video": ["of the bird in the cage", "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["an owl hoots in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "dishes cling together then a man begins to speak"], "sample_ids": ["s4Uz1Ffgo04", "sQGXqGcwOTc"], "start_seconds": ["100", "3"], "properties": ["water, rushes, motorcycle", "cling, speak, dishes"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "mechanisms are operating and water is splashing "], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "a person snores loudly multiple times at a close distance"], "sample_ids": ["weDbePuc-Xc", "sSMl2vc3ek"], "start_seconds": ["40", "20"], "properties": ["cartoon character, music, vocalize", "loud, multiple, distance"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", null], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "a horn rings out as a machine runs by"], "sample_ids": ["wqADXCzngMw", "slZLHwNbbt4"], "start_seconds": ["340", "300"], "properties": ["engine, idle, man", "a, horn, run"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a goat screams and people speak in the background", "wind blowing followed by a zoom"], "sample_ids": ["xC8kbrKJmco", "vr8ZXjEBhMQ"], "start_seconds": ["0", "150"], "properties": ["background, goat, scream", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a goat is bleating ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more calm", "label": 1}, {"captions": ["birds twitter and chirp and clatter", "a man speaks followed by another man speaking outside"], "sample_ids": ["yeFvk9x0wWI", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["chirp, twitter, clatter", "two men, speak, follow"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a human speaking?", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "wind blows as people chatter quietly"], "sample_ids": ["vlS6YMeWAPo", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["noise, bleat, call", "wind, chatter, people"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "footage is blurry and out of focus"], "captions_pred_audio": ["a goat bleats and birds chirp", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "a car speeds away loudly followed by a car revving loudly and driving away while outside"], "sample_ids": ["uWPRNLnpy7Y", "sjlVMgdGSK0"], "start_seconds": ["10", "30"], "properties": ["accelerate, laugh, vehicle", "car, revving, loudly"], "captions_pred_video": ["is taken from a car driving down the street", "a 1965 ford falcon drag racing at 100mph on a 1/8 mile track"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a car accelerates and revs its engine "], "question": "which vehicle is revving loudly", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "a propeller moves loudly nearby"], "sample_ids": ["sapQIQUhFc", "ugHJF0hfYkg"], "start_seconds": ["280", "10"], "properties": ["liquid, flow, distance", "loud, propeller, move"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a helicopter is flying overhead "], "question": "which entity is moving", "label": 1}, {"captions": ["a few ducks quack and scamper and a man speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["w2bYrCVLT60", "zl9Dqx-j7q4"], "start_seconds": ["120", "6"], "properties": ["ducks, speak, quack", "engine, laugh, loud"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", "footage of a man driving a car in the dark"], "captions_pred_audio": ["ducks are quacking and a man is speaking", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["uYT5gxnyMWM", "uEU-Hg5MTN8"], "start_seconds": ["50", "27"], "properties": ["a, scream, girl", "animal, grunts, snorts"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking and a baby is crying"], "question": "which entity is about a girl speaking followed by a scream?", "label": 0}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vqZuVbG6-HI", "tdWhHV3X25Q"], "start_seconds": ["130", "60"], "properties": ["background, male, female", "applause, audience, yells"], "captions_pred_video": ["footage is blurry because it's raining outside", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a man is speaking and a crowd is clapping"], "question": "which entity has more people", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["su6FAOcOA8c", "yDoT73BWsdA"], "start_seconds": ["4", "10"], "properties": ["engine, idle, woman", "engine, revs, vehicle"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a race car accelerates and revs its engine "], "question": "which engine is revving", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["wnpJndXuxLc", "wDVMhEdTiVw"], "start_seconds": ["50", "30"], "properties": ["blows, vehicle, train", "gun, shoot, water"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["wind blowing followed by a zoom", "roadway noise occurs and a truck accelerates"], "sample_ids": ["vr8ZXjEBhMQ", "tgbONvsP47Y"], "start_seconds": ["150", "0"], "properties": ["wind, blow, zoom", "noise, truck, accelerate"], "captions_pred_video": ["is taken from a motorcycle's point of view", "footage of a fire truck entering a garage"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a car is driving on the road "], "question": "which is not a zoom", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "water is sprayed across a hard surface"], "sample_ids": ["un9VQlzgZM", "sQwlkXjQabo"], "start_seconds": ["5", "10"], "properties": ["females, talk, laugh", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "wind blows as people chatter quietly"], "sample_ids": ["xvDdE3zNf8Y", "xBxDz0CFVn0"], "start_seconds": ["120", "30"], "properties": ["a, female, speaks", "wind, chatter, people"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "a stream of water runs briefly"], "sample_ids": ["t25U-v4k4ts", "x-PeY8Yb8M4"], "start_seconds": ["40", "300"], "properties": ["a, chirps, bird", "stream, water, run"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["small dogs yip and bark sharply", "water flows as men speak and yell"], "sample_ids": ["v-wcQf4BDY0", "vJ7JPEFhyLA"], "start_seconds": ["120", "16"], "properties": ["bark, yip, sharply", "water, flow, men"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a dog barks and growls", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["t69a8aRKhmc", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["a, b, c", "rooster, crow, background, men"], "captions_pred_video": ["footage is blurry and out of focus", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["vveS8HT7Uog", "w34HjHr6gAY"], "start_seconds": ["100", "30"], "properties": ["a man, objects, speak", "beeps, hit, woman"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "a toilet flushes and a female speaks"], "sample_ids": ["w2JXXIAdUdg", "yaln9y8I7ms"], "start_seconds": ["10", "230"], "properties": ["snoring, distance, person", "female, flushes, toilet"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "footage is blurry and out of focus"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a toilet flushes and a man speaks"], "question": "which entity is a toilet?", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "a stream of water runs briefly"], "sample_ids": ["vimzuGQvdcU", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["a, man, yells", "stream, water, run"], "captions_pred_video": ["a group of people are rafting down a river", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["s4Uz1Ffgo04", "zFjIWfSD-4"], "start_seconds": ["100", "410"], "properties": ["water, rushes, vehicle", "People, motor, brakes"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a vehicle zooming past?", "label": 0}, {"captions": ["a wooden clack accompanies nearby chirping birds", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["yeFvk9x0wWI", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["clack, bird, chirp", "a woman, a television program, a bird"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a woman is speaking and a dog is whimpering"], "question": "which entity has more birds", "label": 1}, {"captions": ["a clock ticktocks briefly", "wind blows as people chatter quietly"], "sample_ids": ["u7C-AEBQM", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, ticktocks briefly", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["yajyRTUQk3U", "yDoT73BWsdA"], "start_seconds": ["400", "10"], "properties": ["a woman, something, fried", "engine, revs, vehicle"], "captions_pred_video": ["- a woman cooking in the kitchen", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["an electronic device bleeps once", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["tHJ6JSa8Y4", "w5W5Kqtc8E"], "start_seconds": ["0", "100"], "properties": ["bleeps, electronic, device", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a clock is ticking and beeping", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "a horn honks followed by a loud continuous buzzing while men speak"], "sample_ids": ["spYNpeN7rPY", "wsHBIgzs9Fs"], "start_seconds": ["1", "50"], "properties": ["a clock, ticktock, man", "horn, continuous, buzzing"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "shows a motorcycle riding down a country road with a motorcycle in the foreground"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a car accelerates and revs its engine while a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a jet engine spools up and takes off", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vBslzh7saPw", "vbZ-0lGPneg"], "start_seconds": ["90", "30"], "properties": ["engine, spools, takes", "a woman, a television program, a bird"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a moving object", "label": 0}, {"captions": ["a person is whistling", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["sIXTftIuUgw", "tdWhHV3X25Q"], "start_seconds": ["90", "60"], "properties": ["person, whistling, person", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a person whistling a song", "a man is speaking and a crowd is clapping"], "question": "which person is more likely to be a performer", "label": 1}, {"captions": ["an electronic device bleeps once", "a clock ticktocks"], "sample_ids": ["tHJ6JSa8Y4", "v-g-j2uTByM"], "start_seconds": ["0", "30"], "properties": ["bleeps, electronic, device", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a clock is ticking and beeping", "a clock is ticking loudly"], "question": "which entity is a clock", "label": 1}, {"captions": ["a toilet flushes and water drains", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["sfAvvZwdLCY", "vfYTJq7nU"], "start_seconds": ["20", "130"], "properties": ["water drains, flushes, water", "rustling, ducks, quack"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a duck quacks and a woman speaks"], "question": "which entity is about water?", "label": 0}, {"captions": ["popping and crackling repeats as men yell and laugh", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["rqu8iB22IY", "tdWhHV3X25Q"], "start_seconds": ["5", "60"], "properties": ["sound, repeats, laugh", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["rustling with distant murmuring", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["wnNNcxAPwGQ", "uZesmtKZGSw"], "start_seconds": ["0", "250"], "properties": ["sound, distance, rustling", "men, talk, cars"], "captions_pred_video": ["footage of a yellow truck doing a burnout on a race track", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a crowd of people are talking and laughing while a skateboard rolls by ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man speaks as water trickles down a stream", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sapQIQUhFc", "xKB8O8LTs6s"], "start_seconds": ["280", "70"], "properties": ["water, stream, trickles", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more calm", "label": 0}, {"captions": ["heavy rain splashes as it falls", "water splashes as an animal walks through"], "sample_ids": ["wP8ZKrlx3oA", "w1ir-sZ3Im8"], "start_seconds": ["40", "90"], "properties": ["fall, rain, splash", "animal, water, splashes"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a heavy rain is falling on a surface", "water splashes and gurgles as people speak"], "question": "which entity is more likely to cause water to splash", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "water pouring and bubbling"], "sample_ids": ["w5W5Kqtc8E", "uyRfq-jKPpo"], "start_seconds": ["100", "50"], "properties": ["wind, engine, scream", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "water is running from a faucet"], "question": "which entity is more calm", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "water pouring and bubbling"], "sample_ids": ["sDSppXIlJrs", "uyRfq-jKPpo"], "start_seconds": ["27", "50"], "properties": ["microphone, water, wind", "water, bubbles, pouring"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["the wind is blowing and water is splashing", "water is running from a faucet"], "question": "which entity is more likely to be found in a bathroom", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "a clock ticktocks"], "sample_ids": ["zdYdyF9-m8U", "v-g-j2uTByM"], "start_seconds": ["7", "30"], "properties": ["wind, crash, shoreline", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["waves crash and wind blows ", "a clock is ticking loudly"], "question": "which entity is not a clock?", "label": 0}, {"captions": ["a man speaks as crickets sing", "birds chirp and pigeons vocalize while walking around"], "sample_ids": ["ryFDPxgDOGc", "wIvYjuR3nrg"], "start_seconds": ["570", "9"], "properties": ["a, crickets, sing", "birds, pigeons, vocalize"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", "footage of a pigeon sitting on a roof with trees in the background"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "birds are chirping and cooing"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a toilet flushes and water drains", "rain falls on a surface as men speak and music plays"], "sample_ids": ["sfAvvZwdLCY", "w0xsN8X18Y"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "music, surface, rain"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a man is speaking while a motorboat is moving in the background "], "question": "which entity is a video of a toilet flushing and water draining?", "label": 0}, {"captions": ["a man speaks, then dials a rotary telephone", "people speak as gunfire rings out"], "sample_ids": ["tK4VlLsNxak", "wqTCwqVRDlk"], "start_seconds": ["120", "80"], "properties": ["a, dial, telephone", "gunfire, ring, speak"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["wind loudly blowing while people speak in the background followed by a horn blowing", "a car speeding up in the distance"], "sample_ids": ["xjhAnI2q6hM", "u0TrcHhkPQ"], "start_seconds": ["6", "20"], "properties": ["wind, blow, loudly", "distance, car, speed"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", null], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "an engine starts and increases in power"], "sample_ids": ["wP8ZKrlx3oA", "zjTG0gaGCUI"], "start_seconds": ["40", "80"], "properties": ["rain, storm, thunder", "power, increase, engine"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", null], "captions_pred_audio": ["a heavy rain is falling on a surface", "a jet engine roars as wind blows "], "question": "which entity is more powerful", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "a telephone rings followed by a woman talking"], "sample_ids": ["uWAAAL4CIoc", "tGcFnX0GHI"], "start_seconds": ["0", "0"], "properties": ["a woman, chirps, animal", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a motorcycle engine is idling", "a car speeding up in the distance"], "sample_ids": ["vZAqdHZ81yA", "u0TrcHhkPQ"], "start_seconds": ["180", "20"], "properties": ["engine, motorcycle, idling", "distance, car, speed"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", null], "captions_pred_audio": ["an engine is idling loudly", "a race car accelerates and revs its engine "], "question": "which object is moving faster", "label": 0}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sxYkFKFIZD0", "zj2R0XoFr5k"], "start_seconds": ["20", "50"], "properties": ["screech, man, door", "airplane, boy, fly"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["tDlysoZiA1I", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["animal, grunt, multiple", "animal, grunts, snorts"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking and an animal grunts and snorts?", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "an insect buzzes around continuously"], "sample_ids": ["yJ0TePmaOo", "v25l1jef3JY"], "start_seconds": ["390", "0"], "properties": ["two hard objects, man, speak", "buzzes, continuously, insect"], "captions_pred_video": [null, "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["a baby laugh at a sputter", "a train horn blows as it passes by"], "sample_ids": ["sLUnaPT5gM8", "zVacuqSb4LI"], "start_seconds": ["0", "30"], "properties": ["laugh, sputter, baby", "horn, blows, train"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["water pouring and bubbling", "wind blowing followed by a zoom"], "sample_ids": ["uyRfq-jKPpo", "vr8ZXjEBhMQ"], "start_seconds": ["50", "150"], "properties": ["water, bubbles, pouring", "wind, blow, zoom"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["water is running from a faucet", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more likely to be a natural phenomenon", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sQGXqGcwOTc", "xBxDz0CFVn0"], "start_seconds": ["3", "30"], "properties": ["cling, speak, dishes", "stream, water, flow"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage is blurry and out of focus"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "some men converse over an engine running"], "sample_ids": ["wAAkbZToh8", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["burp, laugh, speak", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man burps and a woman speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a conversation?", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["ukxt9I7eMMg", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["continuous, woman, speaking", "female, spraying, scream"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking towards the end?", "label": 0}, {"captions": ["a bird is chirping and tweeting a bird song", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["wPz6QRAkEb4", "tDlysoZiA1I"], "start_seconds": ["60", "0"], "properties": ["chirps, tweets, song", "animal, grunts, chirps"], "captions_pred_video": ["a bird in a cage on top of a pole", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["birds are chirping in the background ", "birds are chirping and a rooster is crowing "], "question": "which entity is a bird", "label": 0}, {"captions": ["a man speaks as horns blow", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["tHyNqRyK34A", "y8WEcpOlT3I"], "start_seconds": ["24", "40"], "properties": ["a, man, speaks", "harsh, wind, blows"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity has a man speaking as horns blow?", "label": 0}, {"captions": ["a clock alarm sounds and gears turn", "a toilet flushes and a female speaks"], "sample_ids": ["w2M4i1mklOA", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["alarm, gears, turn", "female, flushes, toilet"], "captions_pred_video": ["footage of an antique clock", "footage is blurry and out of focus"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a toilet flushes and a man speaks"], "question": "which entity is a video of a toilet flushing?", "label": 1}, {"captions": ["a person snores loudly multiple times at a close distance", "water is sprayed across a hard surface"], "sample_ids": ["sSMl2vc3ek", "sQwlkXjQabo"], "start_seconds": ["20", "10"], "properties": ["loud, multiple, distance", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a person snoring loudly", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["children speak as a female ask them questions", "a woman speaks as she rubs two objects together"], "sample_ids": ["wEBlkGWVWwE", "vzxHnu-SFEw"], "start_seconds": ["260", "80"], "properties": ["female, speak, questions", "two objects, woman, speak"], "captions_pred_video": ["shows a person writing on the whiteboard", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["wind blows strongly and a young man speaks", "a horn blasts loudly as a train passes"], "sample_ids": ["vs65y4qmyBE", "zsLxS-uLJTw"], "start_seconds": ["340", "20"], "properties": ["wind, blows, strongly", "horn, blast, train"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "footage of the train on the tracks at sunrise or sunset"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a train blows its horn and moves on the tracks "], "question": "which is louder", "label": 0}, {"captions": ["continuous sneezing together with speech", "water flows and trickles"], "sample_ids": ["x4dZyf9Gbj0", "tB7hWb9gTuQ"], "start_seconds": ["130", "30"], "properties": ["continuous, sneeze, speech", "water, flow, trickle"], "captions_pred_video": ["footage is blurry and out of focus", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman sneezes and speaks", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a beep repeats multiple times", "a bird is chirping and tweeting a bird song"], "sample_ids": ["y682ml90jGw", "wPz6QRAkEb4"], "start_seconds": ["11", "60"], "properties": ["beep, repeat, multiple", "chirps, tweets, song"], "captions_pred_video": [null, "a bird in a cage on top of a pole"], "captions_pred_audio": ["a beeping sound is being made ", "birds are chirping in the background "], "question": "which entity is a song", "label": 1}, {"captions": ["a man speaks as horns blow", "multiple people speak and children yell while water gurgles"], "sample_ids": ["tHyNqRyK34A", "vb1fPSDI4c"], "start_seconds": ["24", "30"], "properties": ["a, man, speaks", "multiple, people, yell"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", null], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["paper is crumpling consistently", "an engine idles consistently before sputtering some"], "sample_ids": ["v5cSxLaHADY", "rwTERCUno"], "start_seconds": ["0", "90"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "engine, idle, sputter"], "captions_pred_video": ["footage of the person holding a pair of scissors", null], "captions_pred_audio": ["paper is crumpled and crinkled", "an engine is idling and vibrating"], "question": "which entity is consistent", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["y2ZBGpgbhHM", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["animal, growl, bird", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["birds chirping and a dog panting", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "a toilet flushes and water drains"], "sample_ids": ["wnpJndXuxLc", "sfAvvZwdLCY"], "start_seconds": ["50", "20"], "properties": ["beeps, loud, whistle", "water drains, flushes, water"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["speaking following by laughing and clapping", "a person snores loudly multiple times at a close distance"], "sample_ids": ["u2f5NpsoHBg", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["person, laugh, clap", "loud, multiple, distance"], "captions_pred_video": ["is being projected on a screen at the front of the stage", null], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a person snoring loudly"], "question": "which person is speaking", "label": 0}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "winds blows roughly as a vehicle races past"], "sample_ids": ["wy1eKjR7KC0", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["people, talk, distance", "wind, blows, vehicle"], "captions_pred_video": ["two police officers riding motorcycles down the street", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and a siren is going off", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["an emergency siren wails as it passes", "an insect buzzes around continuously"], "sample_ids": ["vGj1XLJvNrw", "v25l1jef3JY"], "start_seconds": ["0", "0"], "properties": ["wails, wails, pass", "buzzes, continuously, insect"], "captions_pred_video": ["footage of a police car driving down a city street", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "a fly is buzzing around a microphone "], "question": "which entity buzzes around continuously", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["vddP56-ogds", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["water, flow, laugh", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a woman speaks and dog vocalizes", "a consistent ticking pattern"], "sample_ids": ["uWAAAL4CIoc", "sCeWURVHfOM"], "start_seconds": ["0", "30"], "properties": ["a, dog, vocalize", "ticking, pattern, clock"], "captions_pred_video": [null, "- a close-up view of the clock's inner workings"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "ticking of a clock"], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp as a train approaches", "a car accelerates and wind blows"], "sample_ids": ["xM4joTqDVp4", "u0TrcHhkPQ"], "start_seconds": ["160", "20"], "properties": ["bird, chirp, train", "accelerates, wind, blows"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", null], "captions_pred_audio": ["birds are chirping and a train is moving ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a horse runs while two women talk", "wind blows as people chatter quietly"], "sample_ids": ["sdvI1mHAsc", "xBxDz0CFVn0"], "start_seconds": ["20", "30"], "properties": ["two women, horse, run", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["uYT5gxnyMWM", "zFjIWfSD-4"], "start_seconds": ["50", "410"], "properties": ["a, scream, girl", "People, motor, brakes"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a girl speaking followed by a scream and more girls talking?", "label": 0}, {"captions": ["an engine runs loudly", "sirens ring and approach with humming of distant traffic"], "sample_ids": ["vqZuVbG6-HI", "xERFUeZONz8"], "start_seconds": ["130", "0"], "properties": ["loud, engine, run", "ring, approach, traffic"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage is blurry due to camera shake or motion blur"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "an emergency vehicle siren blares"], "question": "which entity is a warning", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "someone is typing on a computer keyboard"], "sample_ids": ["y2ZBGpgbhHM", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["birds, tweet, pant", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["birds chirping and a dog panting", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["wvKpEYswXO0", "zl9Dqx-j7q4"], "start_seconds": ["150", "6"], "properties": ["water, tap, run", "engine, laugh, loud"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["wvKpEYswXO0", "su6FAOcOA8c"], "start_seconds": ["150", "4"], "properties": ["sound, water, running", "engine, idle, woman"], "captions_pred_video": ["of the person preparing food in the kitchen", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman speaking softly?", "label": 0}, {"captions": ["an airplane accelerates briefly", "insects humming with a dog barking and small goat bleating"], "sample_ids": ["zjTG0gaGCUI", "tIY7qOV3rEM"], "start_seconds": ["80", "0"], "properties": ["accelerates, airplane, briefly", "animal, bark, dog, barking, small, goat, bleating"], "captions_pred_video": [null, "a dog is standing in the middle of a dirt road in the woods"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a dog is barking and a cat is meowing"], "question": "which animal is barking", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "a toilet flushes and a female speaks"], "sample_ids": ["wtDqrBygTcU", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["man, engine, run", "female, flushes, toilet"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a motor is running", "a toilet flushes and a man speaks"], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["a person sniffles and then sneezes in the distance", "a clock ticktocks"], "sample_ids": ["uRlbY6aoBU", "v-g-j2uTByM"], "start_seconds": ["0", "30"], "properties": ["a, distance, sneeze", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is sneezing ", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "a man speaks, then dials a rotary telephone"], "sample_ids": ["vJvryTwuAV8", "tK4VlLsNxak"], "start_seconds": ["16", "120"], "properties": ["audience, cheer, man", "a, dial, telephone"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "person is wearing a headset and holding a remote control in his hand"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a man is speaking and using a sewing machine"], "question": "which man is speaking?", "label": 0}, {"captions": ["some clanking with distant murmuring", "pigeons vocalize and birds chirp"], "sample_ids": ["uMTTDZ2mb4", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["clanking, murmuring, distant", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["an engine starts and increases in power", "paper is crumpling consistently"], "sample_ids": ["zjTG0gaGCUI", "v5cSxLaHADY"], "start_seconds": ["80", "0"], "properties": ["power, increase, engine", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a jet engine roars as wind blows ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "a steam engine runs and whistles as it passes by"], "sample_ids": ["yZmhM1HcsyE", "se87d6yxEOA"], "start_seconds": ["4", "10"], "properties": ["engine, roar, water", "run, whistle, pass"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "footage of a train passing by a train station with smoke billowing out of the train's smokestack"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a train is moving and blowing its whistle "], "question": "which entity is a steam engine?", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "waves crash against a shoreline and people speak"], "sample_ids": ["sxYkFKFIZD0", "yFB25fqfU8I"], "start_seconds": ["20", "300"], "properties": ["screech, man, door", "wave, crash, shoreline"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a goat screams and people speak in the background", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["xC8kbrKJmco", "zl9Dqx-j7q4"], "start_seconds": ["0", "6"], "properties": ["background, goat, scream", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a goat is bleating ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a helicopter engine runs continuously", "an airplane engine runs"], "sample_ids": ["ugHJF0hfYkg", "yVPZ2MNWpms"], "start_seconds": ["10", "0"], "properties": ["engine, running, continuously", "engine, airplane, runs"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a helicopter is flying overhead ", "a car is driving by on the road "], "question": "which entity has a running engine", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "a duck quacks continuously"], "sample_ids": ["uiItxDsDMFI", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["wood, piece, saw", "quacks, continuously, duck"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a saw is being used with background noise ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "a toilet flushes and water drains"], "sample_ids": ["y2ZBGpgbhHM", "sfAvvZwdLCY"], "start_seconds": ["30", "20"], "properties": ["animal, growl, bird", "water drains, flushes, water"], "captions_pred_video": [null, "footage of the toilet in the bathroom"], "captions_pred_audio": ["birds chirping and a dog panting", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "loud clanking and banging with brief male speech"], "sample_ids": ["uRExseg-0XI", "sWZzXuWYY"], "start_seconds": ["210", "420"], "properties": ["woman, man, water", "male, speech, banging"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", null], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a sewing machine runs and a man speaks"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["people converse as a motor runs and air brakes hiss", "a woman speaks happily and an animal chirps"], "sample_ids": ["zFjIWfSD-4", "uWAAAL4CIoc"], "start_seconds": ["410", "0"], "properties": ["People, motor, brakes", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a telephone rings and a bird vocalizes", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["skd2PphS6oI", "tdWhHV3X25Q"], "start_seconds": ["190", "60"], "properties": ["ring, bird, vocalize", "applause, audience, yells"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a telephone bell rings repeatedly ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "some men converse over an engine running"], "sample_ids": ["sEprKHm8Sj8", "sCiy7QS1U"], "start_seconds": ["90", "300"], "properties": ["car, tires, slows", "men, converse, engine"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is stationary", "label": 1}, {"captions": ["a vehicle engine runs while a siren and horn sound", "an infant crying as a woman laughs"], "sample_ids": ["u--KhUW8l1Y", "xhmRY9yhC7c"], "start_seconds": ["0", "20"], "properties": ["engine, sound, horn", "a, laugh, infant"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["bees buzz and wind blows", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["tMJne1a4AFI", "wqZ135Ssz0"], "start_seconds": ["0", "60"], "properties": ["bees buzz, wind blows, bees", "two men, woman, birds"], "captions_pred_video": ["a swarm of bees on the ground", null], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a video of a natural event?", "label": 0}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "wind blowing followed by a zoom"], "sample_ids": ["vfYTJq7nU", "vr8ZXjEBhMQ"], "start_seconds": ["130", "150"], "properties": ["ducks, quack, man", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a duck quacks and a woman speaks", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "small dogs yip and bark sharply"], "sample_ids": ["vveS8HT7Uog", "v-wcQf4BDY0"], "start_seconds": ["100", "120"], "properties": ["a man, objects, speak", "bark, yip, sharply"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "gunshots ring out, a man yells, and more shots follow"], "sample_ids": ["wvKpEYswXO0", "vKrYfzleLB8"], "start_seconds": ["150", "110"], "properties": ["water, tap, run", "a, ring, gunshots"], "captions_pred_video": ["of the person preparing food in the kitchen", "stock footage of a person holding a gun in their hand"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking with background noise and a cap gun is fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "people applaud and hoot and chat quietly"], "sample_ids": ["w6RTHR6AeAg", "wwyfGO2J4"], "start_seconds": ["40", "90"], "properties": ["call, owl, screech", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["people speak as gunfire rings out", "a man speaks as a motor runs in the background"], "sample_ids": ["wqTCwqVRDlk", "xZepNM9qcRA"], "start_seconds": ["80", "30"], "properties": ["gunfire, ring, speak", "background, motor, run"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking and a gun is fired", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is more quiet", "label": 1}, {"captions": ["dogs barking and whimpering", "people speak as gunfire rings out"], "sample_ids": ["tIY7qOV3rEM", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["barking, whimpering, dog", "gunfire, ring, speak"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a man is speaking and a gun is fired"], "question": "which entity is more calm", "label": 0}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "a person speaks briefly"], "sample_ids": ["vfYTJq7nU", "zOZleIRqZm4"], "start_seconds": ["130", "80"], "properties": ["ducks, quack, man", "person, talk, brief"], "captions_pred_video": [null, "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a man is speaking with crickets chirping in the background"], "question": "which entity is a person talking?", "label": 1}, {"captions": ["a person snores loudly multiple times at a close distance", "a stream of water runs briefly"], "sample_ids": ["sSMl2vc3ek", "x-PeY8Yb8M4"], "start_seconds": ["20", "300"], "properties": ["loud, multiple, distance", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a person snoring loudly", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["a vehicle engine revs and tires squeal", "small dogs yip and bark sharply"], "sample_ids": ["yDoT73BWsdA", "v-wcQf4BDY0"], "start_seconds": ["10", "120"], "properties": ["engine revs, tires squeal, vehicle", "bark, yip, sharply"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["a jet engine spools up and takes off", "an insect buzzes around continuously"], "sample_ids": ["vBslzh7saPw", "v25l1jef3JY"], "start_seconds": ["90", "0"], "properties": ["engine, spools, takes", "buzzes, continuously, insect"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["a man speaks while water drains", "people applaud and hoot and chat quietly"], "sample_ids": ["vSeGhaZt-aI", "wwyfGO2J4"], "start_seconds": ["50", "90"], "properties": ["water, drain, man", "people, applaud, hoot"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", null], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "waves crash against a shoreline and people speak"], "sample_ids": ["wyllXV6PjKo", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["a kid, talk, cry", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["a woman speaks and a baby cries", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more active", "label": 1}, {"captions": ["a kid speaks followed by music playing", "people applaud and hoot and chat quietly"], "sample_ids": ["tQWGZLItBXk", "wwyfGO2J4"], "start_seconds": ["170", "90"], "properties": ["music, kid, speak", "people, applaud, hoot"], "captions_pred_video": ["worms revolution screenshots", null], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a propeller rotates loudly and intensely"], "sample_ids": ["vlJS7LN2XyM", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["background, clocks, ticking", "loud, intense, propeller"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a ticktock of a clock", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "three men talk while wind blows and some liquid flows"], "sample_ids": ["tDlfY3nmx1A", "vJ7JPEFhyLA"], "start_seconds": ["160", "16"], "properties": ["applause, laugh, man", "three men, wind, flow"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "water pouring and bubbling"], "sample_ids": ["yZrFNS7GFBQ", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["pigeon, buzzes, insect", "water, bubbles, pouring"], "captions_pred_video": ["of the bird in the cage", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["an owl hoots in the background ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["birds fly and flutter around", "birds chirp quietly and an adult man speaks"], "sample_ids": ["wGKgwOP3h30", "zuua6-5goWw"], "start_seconds": ["30", "30"], "properties": ["fly, flutter, around", "birds, chirp, quiet, man, speaks"], "captions_pred_video": ["of the pigeons in the coop", "in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot"], "captions_pred_audio": ["pigeons coo and flap their wings", "birds are chirping and a man is speaking with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "a duck quacks continuously"], "sample_ids": ["zsLxS-uLJTw", "vh30P49Po6s"], "start_seconds": ["20", "30"], "properties": ["horn, blast, train", "quacks, continuously, duck"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a duck is quacking loudly"], "question": "which entity is a bird", "label": 1}, {"captions": ["vehicles pass by on a roadway", "winds blows roughly as a vehicle races past"], "sample_ids": ["tgbONvsP47Y", "xjvTpk2Zpr8"], "start_seconds": ["0", "70"], "properties": ["pass, vehicle, roadway", "wind, blows, vehicle"], "captions_pred_video": ["footage of a fire truck entering a garage", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a car is driving on the road ", "a jet engine roars and wind blows "], "question": "which vehicle is moving faster", "label": 0}, {"captions": ["popping and crackling repeats as men yell and laugh", "wind blows as people chatter quietly"], "sample_ids": ["rqu8iB22IY", "xBxDz0CFVn0"], "start_seconds": ["5", "30"], "properties": ["sound, repeats, laugh", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a motorcycle engine is idling", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vZAqdHZ81yA", "vJ7JPEFhyLA"], "start_seconds": ["180", "16"], "properties": ["engine, motorcycle, idling", "three men, wind, flow"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["an engine is idling loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a motorcycle?", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "some men converse over an engine running"], "sample_ids": ["ziUT9IFTkjg", "sCiy7QS1U"], "start_seconds": ["10", "300"], "properties": ["background, birds, rustling", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is more likely to be in a museum", "label": 1}, {"captions": ["a door opens and birds chirp", "a man speaks as horns blow"], "sample_ids": ["yeFvk9x0wWI", "tHyNqRyK34A"], "start_seconds": ["30", "24"], "properties": ["door, open, birds", "a, man, speaks"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "being taken from inside a vehicle on the street at night"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking and a car is honking with background noise "], "question": "which entity is a person", "label": 1}, {"captions": ["a woman talking as an infant is crying", "several insects fly while two men talk"], "sample_ids": ["tMbMDvT50j8", "s-T9OVOiMLo"], "start_seconds": ["12", "330"], "properties": ["a, talk, infant", "several, fly, men"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a woman talking to an infant?", "label": 0}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays"], "sample_ids": ["yYEVLuqEytU", "sU53zg9Jp7s"], "start_seconds": ["40", "380"], "properties": ["animal, pig, background", "a bird chirps, a door bell ringing, a woman gasps"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "a cartoon girl is standing in front of a blue couch"], "captions_pred_audio": ["several sheep bleat and a man speaks", "birds chirp and a doorbell rings with breathing and music in the background "], "question": "which entity has a doorbell?", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "a toilet flushes and a female speaks"], "sample_ids": ["zALy31PjDl0", "yaln9y8I7ms"], "start_seconds": ["21", "230"], "properties": ["a man, a vehicle, a horn", "female, flushes, toilet"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a toilet flushes and a man speaks"], "question": "which entity has a female speaking?", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "a propeller rotates loudly and intensely"], "sample_ids": ["vbpKkWvfOu4", "ugHJF0hfYkg"], "start_seconds": ["560", "10"], "properties": ["a, man, speaks", "loud, intense, propeller"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a helicopter is flying overhead "], "question": "which is louder", "label": 0}, {"captions": ["an small aircraft engine runs and a boy speaks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["xSKJGCItUWE", "vfYTJq7nU"], "start_seconds": ["10", "130"], "properties": ["engine, run, boy", "rustling, ducks, quack"], "captions_pred_video": ["footage of the helicopter flying in the room", null], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a duck quacks and a woman speaks"], "question": "which entity is about a boy speaking?", "label": 0}, {"captions": ["animals bleat and cry out and then a woman speaks", "three men talk while wind blows and some liquid flows"], "sample_ids": ["yZp6xizR0yU", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["animal, bleat, cry", "three men, wind, flow"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["water splashes and wind noise is made into a microphone", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["sDSppXIlJrs", "wDVMhEdTiVw"], "start_seconds": ["27", "30"], "properties": ["microphone, water, wind", "gun, shoot, water"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a gun?", "label": 1}, {"captions": ["wind blows as people chatter quietly", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["xBxDz0CFVn0", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["wind, chatter, people", "engine, idle, woman"], "captions_pred_video": ["footage is blurry and out of focus", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking and a subway train is moving "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["someone snores nearby", "a baby cries and fusses, a woman speaks, and a man speaks"], "sample_ids": ["spJCm8tD9Zo", "wyllXV6PjKo"], "start_seconds": ["90", "30"], "properties": ["someone snores, nearby, someone", "a baby, a woman, a man"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a woman speaks and a baby cries"], "question": "which entity has more people in it", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "water pouring and bubbling"], "sample_ids": ["xERFUeZONz8", "uyRfq-jKPpo"], "start_seconds": ["0", "50"], "properties": ["ring, approach, traffic", "water, bubbles, pouring"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["an emergency vehicle siren blares", "water is running from a faucet"], "question": "which entity is more calm", "label": 1}, {"captions": ["running water in a faucet with some clinks", "three men talk while wind blows and some liquid flows"], "sample_ids": ["zNRChLjqcU", "vJ7JPEFhyLA"], "start_seconds": ["220", "16"], "properties": ["water, faucet, run", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["water is running from a faucet into a sink", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a liquid flowing?", "label": 1}, {"captions": ["a child yells and another yells", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["vMDHu7Lxcgw", "vlS6YMeWAPo"], "start_seconds": ["410", "40"], "properties": ["two, yell, child", "sheep, baa, birds"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a toilet flushes and water sputters as it drains", "water flows and trickles"], "sample_ids": ["smGI3C1NZc", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["water, drain, toilet", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a toilet is flushed", "water is splashing and gurgling"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a motorcycle engine works nearby", "water is sprayed across a hard surface"], "sample_ids": ["tOSWIURC-4", "sQwlkXjQabo"], "start_seconds": ["0", "10"], "properties": ["engine, work, nearby", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a lawn mower is running ", "spraying followed by silence"], "question": "which is a liquid", "label": 1}, {"captions": ["a person snores loudly multiple times at a close distance", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sSMl2vc3ek", "vJ7JPEFhyLA"], "start_seconds": ["20", "16"], "properties": ["loud, multiple, distance", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more likely to be heard", "label": 0}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "people cheer as a vehicle engine revs"], "sample_ids": ["uPDn2BFTHk", "xjhAnI2q6hM"], "start_seconds": ["140", "6"], "properties": ["lady, laugh, baby", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["xO-Q2BlIIPU", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["two men, exclamation, speak", "two men, woman, birds"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has two men speaking?", "label": 0}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "a duck quacks continuously"], "sample_ids": ["ylpYOorfH4o", "vh30P49Po6s"], "start_seconds": ["410", "30"], "properties": ["engine, run, loud", "quacks, continuously, duck"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a vehicle engine runs while a siren and horn sound", "an airplane engine runs"], "sample_ids": ["u--KhUW8l1Y", "yVPZ2MNWpms"], "start_seconds": ["0", "0"], "properties": ["engine, sound, horn", "engine, airplane, runs"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a car is driving by on the road "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["a church bell rings several times", "someone snores nearby"], "sample_ids": ["sUVVjE3Ucp8", "spJCm8tD9Zo"], "start_seconds": ["0", "90"], "properties": ["ring, bell, several", "someone snores, nearby, someone"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a church bell is ringing ", "a person is snoring loudly"], "question": "which is louder", "label": 1}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "small dogs yip and bark sharply"], "sample_ids": ["xNMovAf3o50", "v-wcQf4BDY0"], "start_seconds": ["0", "120"], "properties": ["rain, thunder, music", "bark, yip, sharply"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["thunder and rain with music playing in the background ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "a car speeding up in the distance"], "sample_ids": ["tPJvjq9QePY", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["animal, bleat, moo", "distance, car, speed"], "captions_pred_video": ["a dog and a sheep in a barn", null], "captions_pred_audio": ["a baby cries and a man speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["sapQIQUhFc", "yDoT73BWsdA"], "start_seconds": ["280", "10"], "properties": ["water, trickles, flow", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "music plays and animals vocalize as a cartoon character makes sounds"], "sample_ids": ["tOj4tdLRaA", "weDbePuc-Xc"], "start_seconds": ["70", "40"], "properties": ["woman, laugh, baby", "cartoon character, music, vocalize"], "captions_pred_video": [null, "a cartoon frog and a butterfly are sitting on the ground next to each other"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking and birds are chirping with a frog croaking in the background "], "question": "which entity has a baby?", "label": 0}, {"captions": ["small dogs yip and bark sharply", "a car accelerates and wind blows"], "sample_ids": ["v-wcQf4BDY0", "u0TrcHhkPQ"], "start_seconds": ["120", "20"], "properties": ["bark, yip, sharply", "accelerates, wind, blows"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", null], "captions_pred_audio": ["a dog barks and growls", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "an airplane engine runs"], "sample_ids": ["vuUVPzd2FXw", "yVPZ2MNWpms"], "start_seconds": ["160", "0"], "properties": ["a, steam, release", "engine, airplane, runs"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a car is driving by on the road "], "question": "which object releases steam", "label": 0}, {"captions": ["a man speaks while video game music plays with some clicking", "a toilet flushes and water drains"], "sample_ids": ["tw76HGONaKg", "sfAvvZwdLCY"], "start_seconds": ["570", "20"], "properties": ["music, click, man", "water drains, flushes, water"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["people speak in a closed space", "vehicle engines race around a track as a man commentates"], "sample_ids": ["sTpirNYo8vQ", "sZPuqDgX2V0"], "start_seconds": ["30", "30"], "properties": ["people, space, speak", "commentator, race, track"], "captions_pred_video": ["of a man taking a selfie on a bus", null], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a man is speaking and a helicopter is flying overhead "], "question": "which is a video", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "a duck quacks continuously"], "sample_ids": ["u--KhUW8l1Y", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["horn, siren, life", "quacks, continuously, duck"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a man speaks as a car is passing by"], "sample_ids": ["xKB8O8LTs6s", "sK4u5T8hW78"], "start_seconds": ["70", "30"], "properties": ["music, radio, gunshots", "a, car, pass"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a radio?", "label": 0}, {"captions": ["a woman speaks as she rubs two objects together", "two women and a man talk while a kid cries"], "sample_ids": ["vzxHnu-SFEw", "wyllXV6PjKo"], "start_seconds": ["80", "30"], "properties": ["two objects, woman, speak", "a kid, talk, cry"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", null], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a woman speaks and a baby cries"], "question": "which entity has a kid?", "label": 1}, {"captions": ["wind loudly blowing while people speak in the background followed by a horn blowing", "an airplane engine runs"], "sample_ids": ["xjhAnI2q6hM", "yVPZ2MNWpms"], "start_seconds": ["6", "0"], "properties": ["wind, blow, loudly", "engine, airplane, runs"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a car is driving by on the road "], "question": "which entity is running", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["vmrxwuAMb2I", "sLUnaPT5gM8"], "start_seconds": ["40", "0"], "properties": ["a dog, inhales, exhales", "loud, laughter, intermittent"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a dog barks and growls", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a heavy rain falls endlessly", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["wP8ZKrlx3oA", "sLUnaPT5gM8"], "start_seconds": ["40", "0"], "properties": ["heavy, rain, fall", "loud, laughter, intermittent"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "water runs from a faucet while some men speak and the water runs in the sink"], "sample_ids": ["tIY7qOV3rEM", "vzceMbklWc"], "start_seconds": ["0", "180"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "water, faucet, sink"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", null], "captions_pred_audio": ["a dog is barking and a cat is meowing", "water is running and a man is speaking"], "question": "which entity is a video of a sink?", "label": 1}, {"captions": ["a goat screams and people speak in the background", "dog barking and vehicle engine idling followed shortly by vehicle engine revving"], "sample_ids": ["xC8kbrKJmco", "zY3icUyMdh8"], "start_seconds": ["0", "20"], "properties": ["background, goat, scream", "dog, bark, engine"], "captions_pred_video": [null, "footage of a bus driving through a residential street at night"], "captions_pred_audio": ["a goat is bleating ", "a car is driving and dogs are barking and squealing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["an aircraft engine runs", "some men converse over an engine running"], "sample_ids": ["yLCORCnd35Q", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["engine, aircraft, runs", "men, converse, engine"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", null], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "a telephone rings followed by a woman talking"], "sample_ids": ["yajyRTUQk3U", "tGcFnX0GHI"], "start_seconds": ["400", "0"], "properties": ["noise, woman, speak", "ring, talk, woman"], "captions_pred_video": ["- a woman cooking in the kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a dial tone sounds followed by a woman speaking"], "question": "which woman is talking", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "water splashes as an animal walks through"], "sample_ids": ["zcDwZ6W7E3E", "w1ir-sZ3Im8"], "start_seconds": ["180", "90"], "properties": ["man, speak, motorcycles", "animal, water, splashes"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "small dogs yip and bark sharply"], "sample_ids": ["tGcFnX0GHI", "v-wcQf4BDY0"], "start_seconds": ["0", "120"], "properties": ["ring, talk, woman", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a car accelerates and wind blows", "soft movement is accompanied by clocks ticking in the background"], "sample_ids": ["u0TrcHhkPQ", "vlJS7LN2XyM"], "start_seconds": ["20", "30"], "properties": ["accelerates, wind, blows", "background, clocks, ticking"], "captions_pred_video": [null, "of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a ticktock of a clock"], "question": "which entity is accompanied by clocks ticking in the background?", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "a small engine idles continuously"], "sample_ids": ["sQGXqGcwOTc", "y5WII6cTH7k"], "start_seconds": ["3", "40"], "properties": ["cling, speak, dishes", "engine, idle, continuously"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage of a sewing machine stitching a red and white hat"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "an engine is knocking and vibrating "], "question": "which entity is stationary", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "an insect buzzes around continuously"], "sample_ids": ["sTpirNYo8vQ", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["a, tone, fast", "buzzes, continuously, insect"], "captions_pred_video": ["of a man taking a selfie on a bus", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a fly is buzzing around a microphone "], "question": "which entity is not a person", "label": 1}, {"captions": ["a person speaks over rustling leaves", "a telephone rings followed by a woman talking"], "sample_ids": ["zOZleIRqZm4", "tGcFnX0GHI"], "start_seconds": ["80", "0"], "properties": ["rustling, leaves, person", "ring, talk, woman"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a recording", "label": 1}, {"captions": ["food is frying then a woman speaks", "an aircraft engine runs as wind blows heavily"], "sample_ids": ["ukxt9I7eMMg", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["food, woman, speak", "engine, run, wind"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a jet engine roars and wind blows "], "question": "which entity is a moving object", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "a series of light horn beeps is followed by a loud steam whistle"], "sample_ids": ["w2JXXIAdUdg", "wnpJndXuxLc"], "start_seconds": ["10", "50"], "properties": ["snoring, distance, person", "beeps, loud, whistle"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["t97k0cejSQE", "xfaoyyzw2WU"], "start_seconds": ["250", "180"], "properties": ["bird, chirp, insect", "loud, jet engine, roar"], "captions_pred_video": ["a bee on a purple thistle flower", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vimzuGQvdcU", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["a, man, yells", "rooster, crow, background, men"], "captions_pred_video": ["a group of people are rafting down a river", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["a man speaks as crickets sing", "a airplane flies overhead as a woman speaks"], "sample_ids": ["ryFDPxgDOGc", "zj2R0XoFr5k"], "start_seconds": ["570", "50"], "properties": ["a, crickets, sing", "airplane, fly, woman"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a man speaking as crickets sing?", "label": 0}, {"captions": ["an engine runs loudly", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vqZuVbG6-HI", "w5W5Kqtc8E"], "start_seconds": ["130", "100"], "properties": ["loud, engine, run", "wind, blow, vehicle"], "captions_pred_video": ["footage is blurry because it's raining outside", null], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "a woman speaks as she rubs two objects together"], "sample_ids": ["yVumC9TGknc", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["humming, clock, birds", "two objects, woman, speak"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a series of beeps and chirps", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["slZLHwNbbt4", "zl9Dqx-j7q4"], "start_seconds": ["300", "6"], "properties": ["a, horn, run", "engine, laugh, loud"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a jet engine roars "], "question": "which entity is followed by a man laughing", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "an airplane engine runs"], "sample_ids": ["sG7TyPnFDR0", "yVPZ2MNWpms"], "start_seconds": ["180", "0"], "properties": ["beeps, machine, smoke alarm", "engine, airplane, runs"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a car is driving by on the road "], "question": "which entity is a machine?", "label": 0}, {"captions": ["birds vocalize and a man speaks", "paper is crumpling consistently"], "sample_ids": ["v0wPrLBI3hg", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["vocalize, bird, speak", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of the pigeons feeding on the ground", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["someone sprays a liquid onto a hard surface making a hiss sound", "winds blows roughly as a vehicle races past"], "sample_ids": ["zO-LSSY92ZM", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["liquid, surface, sound", "wind, blows, vehicle"], "captions_pred_video": ["youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["steam is hissing and hissing", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["vehicles pass by on a roadway", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["tgbONvsP47Y", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["pass, vehicle, roadway", "animal, grunts, snorts"], "captions_pred_video": ["footage of a fire truck entering a garage", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a car is driving on the road ", "a woman is speaking and a baby is crying"], "question": "which entity is not a vehicle?", "label": 1}, {"captions": ["someone snores nearby", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["spJCm8tD9Zo", "zl9Dqx-j7q4"], "start_seconds": ["90", "6"], "properties": ["someone snores, nearby, someone", "engine, laugh, loud"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a person is snoring loudly", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "an airplane accelerates briefly"], "sample_ids": ["xERFUeZONz8", "zjTG0gaGCUI"], "start_seconds": ["0", "80"], "properties": ["ring, approach, traffic", "accelerates, airplane, briefly"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", null], "captions_pred_audio": ["an emergency vehicle siren blares", "a jet engine roars as wind blows "], "question": "which entity is moving", "label": 1}, {"captions": ["a stream runs then someone speaks", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["wbHTKEJZyhc", "zFjIWfSD-4"], "start_seconds": ["20", "410"], "properties": ["stream, run, someone", "People, motor, brakes"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", null], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a stream running?", "label": 0}, {"captions": ["a woman sneezes then speaks", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["x4dZyf9Gbj0", "w34HjHr6gAY"], "start_seconds": ["130", "30"], "properties": ["sneezes, speaks, woman", "beeps, hit, woman"], "captions_pred_video": ["footage is blurry and out of focus", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a woman sneezes and speaks", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a door opens and birds chirp", "a mechanical buzzing getting louder"], "sample_ids": ["yeFvk9x0wWI", "sEprKHm8Sj8"], "start_seconds": ["30", "90"], "properties": ["door, open, birds", "noise, loud, buzzing"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a race car accelerates and revs its engine "], "question": "which entity is a noise", "label": 1}, {"captions": ["continuous snoring", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["sLkeqCDJIyw", "tiDFTC-5vU"], "start_seconds": ["120", "30"], "properties": ["loud, snoring, noise", "male, duck, laugh"], "captions_pred_video": [", what is the man doing on the couch? sleeping", null], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and ducks are quacking"], "question": "which entity is speaking", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "a man speaks as a car is passing by"], "sample_ids": ["x6ijhqRY38s", "sK4u5T8hW78"], "start_seconds": ["250", "30"], "properties": ["something metal, glass, hit", "a, car, pass"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "water splashes and a door squeaks"], "sample_ids": ["vimzuGQvdcU", "sdXV-ylviw"], "start_seconds": ["30", "190"], "properties": ["a, man, yells", "sound, splash, door"], "captions_pred_video": ["a group of people are rafting down a river", null], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a dog barks and taps with background noise "], "question": "which entity has a door squeaking?", "label": 1}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "three men talk while wind blows and some liquid flows"], "sample_ids": ["w9lpbUn0hPc", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["male, wind, rustling", "three men, wind, flow"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more wind", "label": 1}, {"captions": ["a toilet flushes and water drains", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["sfAvvZwdLCY", "uEU-Hg5MTN8"], "start_seconds": ["20", "27"], "properties": ["water drains, flushes, water", "a woman, laughs, animal"], "captions_pred_video": ["footage of the toilet in the bathroom", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "a man speaks followed by another man speaking outside"], "sample_ids": ["vmrxwuAMb2I", "viuTg1M-dqg"], "start_seconds": ["40", "30"], "properties": ["a dog, inhales, exhales", "two men, speak, follow"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a dog barks and growls", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person speaking to another person?", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "crowd applause while a guy laughs followed by another man speaking"], "sample_ids": ["vZAw4apG0Es", "tDlfY3nmx1A"], "start_seconds": ["30", "160"], "properties": ["background, clock, ticktocks", "applause, laugh, man"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "a man in a suit and tie is talking to another man in a suit and tie"], "captions_pred_audio": ["a clock is ticking and people are talking", "a crowd is clapping and laughing and a man is speaking "], "question": "which entity has a clock ticktocking in the background?", "label": 0}, {"captions": ["water gurgles, metal squeaks and the water stops", "a car speeding up in the distance"], "sample_ids": ["x4a9YGIw4ok", "u0TrcHhkPQ"], "start_seconds": ["120", "20"], "properties": ["water, gurgles, stops", "distance, car, speed"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a toilet flushes and water splashes", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["w5W5Kqtc8E", "y2bVZ7rz-5M"], "start_seconds": ["100", "280"], "properties": ["water, splashes, motorboat", "motor noise, horn, siren"], "captions_pred_video": [null, "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is a warning", "label": 1}, {"captions": ["a woman speaks while water pours", "a clock rings and ticktocks"], "sample_ids": ["wTideSjRFS0", "yaLIJu2U4Y"], "start_seconds": ["30", "30"], "properties": ["water, pours, woman", "ring, ticktock, clock"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": null, "question": "which entity is silent", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "a clock ticktocks"], "sample_ids": ["zgUgkpk78xU", "v-g-j2uTByM"], "start_seconds": ["70", "30"], "properties": ["horn, bells, ring", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["people speak and tapping occurs", "water flows and trickles"], "sample_ids": ["tFCUUGdREgA", "tB7hWb9gTuQ"], "start_seconds": ["70", "30"], "properties": ["people, tap, speak", "water, flow, trickle"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks as water trickles down a stream", "wind blowing followed by a zoom"], "sample_ids": ["sapQIQUhFc", "vr8ZXjEBhMQ"], "start_seconds": ["280", "150"], "properties": ["water, stream, trickles", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a person is whistling", "a speedboat passes quickly on the water"], "sample_ids": ["sIXTftIuUgw", "tjmoSi330GM"], "start_seconds": ["90", "23"], "properties": ["person, whistling, person", "speed, water, boat"], "captions_pred_video": [null, "a person riding a jet ski on a lake with trees in the background"], "captions_pred_audio": ["a person whistling a song", "a motorboat speeds through water with wind noise "], "question": "which is moving faster", "label": 0}, {"captions": ["a vehicle engine runs and someone speaks", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zF8yoL0rkbI", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["engine, run, someone", "applause, audience, yells"], "captions_pred_video": ["footage of the traffic on the street at night", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "a man speaks as a car is passing by"], "sample_ids": ["vKrYfzleLB8", "sK4u5T8hW78"], "start_seconds": ["110", "30"], "properties": ["a, ring, gunshots", "a, car, pass"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more calm", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "a man speaks over intermittent keyboard taps"], "sample_ids": ["xM4joTqDVp4", "tw76HGONaKg"], "start_seconds": ["160", "570"], "properties": ["background, chirp, birds", "audio, man, keyboard"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a man speaks and types on a computer keyboard "], "question": "which entity has a man speaking over intermittent keyboard taps?", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "wind blows as people chatter quietly"], "sample_ids": ["w2M4i1mklOA", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["alarm, gears, turn", "wind, chatter, people"], "captions_pred_video": ["footage of an antique clock", "footage is blurry and out of focus"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["the rumbling of a bus followed by a soft male voice", "birds chirp, a woman speaks, and insects buzz"], "sample_ids": ["vK93VuO0yNc", "t97k0cejSQE"], "start_seconds": ["30", "250"], "properties": ["male voice, bus, rumble", "sound, chirp, buzz"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", "a bee on a purple thistle flower"], "captions_pred_audio": ["a car drives by with wind noise in the background ", "a bee buzzes and a woman speaks"], "question": "which entity has a higher pitch", "label": 1}, {"captions": ["a helicopter engine runs continuously", "an adult woman and an adult man speak"], "sample_ids": ["ugHJF0hfYkg", "zTLVJCo4WEE"], "start_seconds": ["10", "30"], "properties": ["engine, running, continuously", "two people, adult, speak"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "- a boy with a rifle aiming at a target"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman speaks and crickets chirp"], "question": "which entity is speaking", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "wind blowing followed by a zoom"], "sample_ids": ["xOZfdgAgJ9o", "vr8ZXjEBhMQ"], "start_seconds": ["40", "150"], "properties": ["woman, whimpering, speaking", "wind, blow, zoom"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["water runs into a sink while men speak", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["vzceMbklWc", "zFjIWfSD-4"], "start_seconds": ["180", "410"], "properties": ["water, sink, run", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and a man is speaking", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a person", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "a woman speaks as she rubs two objects together"], "sample_ids": ["xjvTpk2Zpr8", "vzxHnu-SFEw"], "start_seconds": ["70", "80"], "properties": ["wind, blows, vehicle", "two objects, woman, speak"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["speaking following by laughing and clapping", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["u2f5NpsoHBg", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["person, laugh, clap", "People, motor, brakes"], "captions_pred_video": ["is being projected on a screen at the front of the stage", null], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity shows a person speaking and laughing and clapping?", "label": 0}, {"captions": ["two women and a man talk while a kid cries", "a machine engine runs and a man speaks"], "sample_ids": ["wyllXV6PjKo", "vs65y4qmyBE"], "start_seconds": ["30", "340"], "properties": ["a kid, talk, cry", "engine, run, man"], "captions_pred_video": [null, "a car is engulfed in flames on the side of the road"], "captions_pred_audio": ["a woman speaks and a baby cries", "a heavy engine is running and men are speaking "], "question": "which entity has a man speaking?", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wqADXCzngMw", "zj2R0XoFr5k"], "start_seconds": ["340", "50"], "properties": ["engine, idle, man", "airplane, boy, fly"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a plane flying by?", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "a car accelerates and wind blows"], "sample_ids": ["sxYkFKFIZD0", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["screech, man, door", "accelerates, wind, blows"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", null], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a race car accelerates and revs its engine "], "question": "which car is moving faster", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["xKB8O8LTs6s", "y8WEcpOlT3I"], "start_seconds": ["70", "40"], "properties": ["music, gunfire, explosion", "harsh, wind, blows"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "on how to use a sewing machine youtube"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking with wind noise in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "a man speaks with another voice speaking in the background"], "sample_ids": ["uJV8NDaHqqk", "u21-Z5gJCB8"], "start_seconds": ["100", "30"], "properties": ["loud, fly, chirp", "background, voice, man"], "captions_pred_video": ["a bee hive in a wooden box", "- a person cooking eggs in a pan on the stove"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking with background noise and breathing sounds "], "question": "which entity is quieter", "label": 1}, {"captions": ["motors runs briefly and tires screech", "a few ducks quack and scamper and a man speaks"], "sample_ids": ["yRx9txMcBl0", "w2bYrCVLT60"], "start_seconds": ["40", "120"], "properties": ["motors, tires, screech", "ducks, speak, quack"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "of the ducks drinking from a pink pool in the grass"], "captions_pred_audio": ["a car is revving its engine and skidding ", "ducks are quacking and a man is speaking"], "question": "which entity is a bird?", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "a young woman speaks over spraying and another person yells"], "sample_ids": ["xjvTpk2Zpr8", "uYT5gxnyMWM"], "start_seconds": ["70", "50"], "properties": ["wind, blows, vehicle", "person, spray, yell"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a woman is speaking and a baby is crying"], "question": "which entity is a person speaking over spraying?", "label": 1}, {"captions": ["motors runs briefly and tires screech", "a train horn blows as it passes by"], "sample_ids": ["yRx9txMcBl0", "zVacuqSb4LI"], "start_seconds": ["40", "30"], "properties": ["motors, tires, screech", "horn, blows, train"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train?", "label": 1}, {"captions": ["an insect buzzes around continuously", "birds vocalize and chirp continuously"], "sample_ids": ["v25l1jef3JY", "w1mlz3Pe4fU"], "start_seconds": ["0", "300"], "properties": ["buzzes, continuously, insect", "vocalize, chirp, continuously"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "of a bird in a cage"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "birds are chirping and singing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a door opens and birds chirp", "a man speaks followed by another man speaking outside"], "sample_ids": ["yeFvk9x0wWI", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["door, open, birds", "two men, speak, follow"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more than one speaker?", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "a woman and man are speaking"], "sample_ids": ["x9JovgqUcs", "vbpKkWvfOu4"], "start_seconds": ["500", "560"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "two people, speaking, woman, man"], "captions_pred_video": [null, "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a woman is speaking and a man is speaking"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["water gurgles, metal squeaks and the water stops", "a stream of water runs briefly"], "sample_ids": ["x4a9YGIw4ok", "x-PeY8Yb8M4"], "start_seconds": ["120", "300"], "properties": ["water, gurgles, stops", "stream, water, run"], "captions_pred_video": ["footage is blurry and out of focus", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a toilet flushes and water splashes", "a car is driving on a wet road "], "question": "which entity has more water", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a clock ticktocks briefly"], "sample_ids": ["uzQnlJXBbOM", "u7C-AEBQM"], "start_seconds": ["50", "30"], "properties": ["ringing, beep, stop", "ticktocks, clock, ticktocks briefly"], "captions_pred_video": ["footage of a person using a cell phone on a table", null], "captions_pred_audio": ["a telephone rings and a man speaks", "a ticktock of a clock"], "question": "which entity is a clock?", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["xyx6eNVEYRY", "xKB8O8LTs6s"], "start_seconds": ["380", "70"], "properties": ["loud, engine, muffles", "music, gunfire, explosion"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["x6ijhqRY38s", "zFjIWfSD-4"], "start_seconds": ["250", "410"], "properties": ["bowl, silverware, man", "People, motor, brakes"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", null], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a man moving silverware in a bowl?", "label": 0}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "water pouring and bubbling"], "sample_ids": ["uPDn2BFTHk", "uyRfq-jKPpo"], "start_seconds": ["140", "50"], "properties": ["lady, laugh, baby", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a baby laughs and a woman speaks", "water is running from a faucet"], "question": "which entity is more likely to be in a bath", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["weDbePuc-Xc", "w5W5Kqtc8E"], "start_seconds": ["40", "100"], "properties": ["cartoon character, music, vocalize", "wind, blow, vehicle"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", null], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["two frogs croak at each other", "small dogs yip and bark sharply"], "sample_ids": ["zg0X6BnhOLQ", "v-wcQf4BDY0"], "start_seconds": ["410", "120"], "properties": ["two frogs, croak, at each other", "bark, yip, sharply"], "captions_pred_video": ["footage of lightning in the sky at night", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a frog is croaking", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a speedboat passes quickly on the water", "a propeller rotates loudly and intensely"], "sample_ids": ["tjmoSi330GM", "ugHJF0hfYkg"], "start_seconds": ["23", "10"], "properties": ["speed, water, boat", "loud, intense, propeller"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "a helicopter is flying overhead "], "question": "which is quieter", "label": 0}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "water flows and trickles"], "sample_ids": ["vW4x7S1VfQc", "tB7hWb9gTuQ"], "start_seconds": ["150", "30"], "properties": ["clacking, oil, woman", "water, flow, trickle"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["food sizzles in a frying pan", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["yYEVLuqEytU", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["grunt, slurp, background", "animal, grunts, snorts"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "a duck quacks loudly and continuously"], "sample_ids": ["sNB8zxXneIM", "vh30P49Po6s"], "start_seconds": ["20", "30"], "properties": ["several, quack, cocks", "loud, continuous, quacks"], "captions_pred_video": ["a group of geese in a cage", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a duck is quacking loudly"], "question": "which duck is quacking loudly", "label": 1}, {"captions": ["food is frying then a woman speaks", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["ukxt9I7eMMg", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["food, woman, speak", "music, gunfire, explosion"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["water bubbles and gurgles.", "a toilet flushes and water drains unevenly"], "sample_ids": ["tB7hWb9gTuQ", "vhJWZheqaE"], "start_seconds": ["30", "0"], "properties": ["bubbles, gurgles, water", "water drains unevenly, toilet flushes, water drains"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", null], "captions_pred_audio": ["water is splashing and gurgling", "a toilet is flushed"], "question": "which entity is a toilet?", "label": 1}, {"captions": ["the rumbling of a bus followed by a soft male voice", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vK93VuO0yNc", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["male voice, bus, rumble", "wind, blow, vehicle"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", null], "captions_pred_audio": ["a car drives by with wind noise in the background ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "water flows as men speak and yell"], "sample_ids": ["wSVhSdj0F0", "vJ7JPEFhyLA"], "start_seconds": ["10", "16"], "properties": ["beep, clang, footsteps", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a man speaking and yelling?", "label": 1}, {"captions": ["various birds chirp and squeal, and an animal grunts", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["tDlysoZiA1I", "xfaoyyzw2WU"], "start_seconds": ["0", "180"], "properties": ["animal, grunts, chirps", "loud, jet engine, roar"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "winds blows roughly as a vehicle races past"], "sample_ids": ["xyx6eNVEYRY", "xjvTpk2Zpr8"], "start_seconds": ["380", "70"], "properties": ["loud, engine, muffles", "wind, blows, vehicle"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["water splashes as an animal walks through", "wind blowing followed by a zoom"], "sample_ids": ["w1ir-sZ3Im8", "vr8ZXjEBhMQ"], "start_seconds": ["90", "150"], "properties": ["animal, water, splashes", "wind, blow, zoom"], "captions_pred_video": ["footage of a group of people riding horses through a river", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["water splashes and gurgles as people speak", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more calm", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["xSKJGCItUWE", "sSMl2vc3ek"], "start_seconds": ["10", "20"], "properties": ["engine, run, boy", "loud, multiple, distance"], "captions_pred_video": ["footage of the helicopter flying in the room", null], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a motor runs and stops, and animals squawk and croak", "a machine engine runs and a man speaks"], "sample_ids": ["s4tUs779vBA", "vs65y4qmyBE"], "start_seconds": ["160", "340"], "properties": ["a, sound, stop", "engine, run, man"], "captions_pred_video": ["game in 10 words or less - screenshot 1", "a car is engulfed in flames on the side of the road"], "captions_pred_audio": ["a car is revving and a man is speaking ", "a heavy engine is running and men are speaking "], "question": "which entity has a man speaking?", "label": 1}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "a machine beeps continuously"], "sample_ids": ["wnpJndXuxLc", "y682ml90jGw"], "start_seconds": ["50", "11"], "properties": ["beeps, loud, whistle", "beeps, machine, continuously"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", null], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a beeping sound is being made "], "question": "which entity is a machine", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "someone snores nearby"], "sample_ids": ["vh30P49Po6s", "spJCm8tD9Zo"], "start_seconds": ["30", "90"], "properties": ["loud, continuous, quacks", "someone snores, nearby, someone"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a duck is quacking loudly", "a person is snoring loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "an insect buzzes around continuously"], "sample_ids": ["uEU-Hg5MTN8", "v25l1jef3JY"], "start_seconds": ["27", "0"], "properties": ["a woman, laughs, animal", "buzzes, continuously, insect"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a fly is buzzing around a microphone "], "question": "which entity is more active", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "a car accelerates and wind blows"], "sample_ids": ["uKCSGgof8gI", "u0TrcHhkPQ"], "start_seconds": ["12", "20"], "properties": ["chirps, distance, signal", "accelerates, wind, blows"], "captions_pred_video": ["footage of a street in a small town on a sunny day", null], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["x6ijhqRY38s", "tiDFTC-5vU"], "start_seconds": ["250", "30"], "properties": ["bowl, silverware, man", "male, duck, laugh"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", null], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck?", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wtDqrBygTcU", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["man, engine, run", "music, gunfire, explosion"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and a motor is running", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more active", "label": 1}, {"captions": ["a person is snoring while sleeping", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vJrjSeP17yE", "tiDFTC-5vU"], "start_seconds": ["40", "30"], "properties": ["a person is sleeping, snoring, person", "male, duck, laugh"], "captions_pred_video": ["a black background with a small plane flying in the sky", null], "captions_pred_audio": ["a person snoring loudly", "a man is speaking and ducks are quacking"], "question": "which entity is a person", "label": 0}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["uRExseg-0XI", "zj2R0XoFr5k"], "start_seconds": ["210", "50"], "properties": ["woman, man, water", "airplane, boy, fly"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a woman speaks while a helicopter flies overhead "], "question": "which entity has a boy speaking?", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "a vehicle engine accelerating then running on idle"], "sample_ids": ["w34HjHr6gAY", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["beeps, hit, woman", "engine, accelerate, idle"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tK4VlLsNxak", "zj2R0XoFr5k"], "start_seconds": ["120", "50"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "airplane, boy, fly"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["vehicles pass by on a roadway", "goats bleat and metal clings"], "sample_ids": ["tgbONvsP47Y", "tH17JPjDPnc"], "start_seconds": ["0", "260"], "properties": ["pass, vehicle, roadway", "bleat, metal, clings"], "captions_pred_video": ["footage of a fire truck entering a garage", "feed of the goats eating hay in the barn"], "captions_pred_audio": ["a car is driving on the road ", "a cow is mooing and mechanisms are ticking "], "question": "which entity is not a vehicle?", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "vehicles pass by on a roadway"], "sample_ids": ["yks4cLgIDMc", "tgbONvsP47Y"], "start_seconds": ["170", "0"], "properties": ["background, speaking, child", "pass, vehicle, roadway"], "captions_pred_video": ["footage of two kids wrestling on the floor", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and a child is crying", "a car is driving on the road "], "question": "which entity is more active", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "birds chirp and objects are moved around"], "sample_ids": ["s4Uz1Ffgo04", "yPUYU6t3rwo"], "start_seconds": ["100", "370"], "properties": ["water, rushes, vehicle", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "insects buzz and a man speaks"], "question": "which entity is more calm", "label": 1}, {"captions": ["a baby cries and wails as an adult female speaks", "people cheer as a vehicle engine revs"], "sample_ids": ["zliInBdC98Y", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["a, baby, cries, wails", "engine revs, vehicle, people"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a baby cries and a woman speaks", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "some light rustling followed by a loud burp and a girl speaking"], "sample_ids": ["uYT5gxnyMWM", "vdoxuJn9lTc"], "start_seconds": ["50", "40"], "properties": ["female, spraying, scream", "burp, loud, girl"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a group of young girls playing a video game together in a living room"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a child speaks followed by a burp"], "question": "which entity is a burp", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "a clock ticktocks"], "sample_ids": ["ylpYOorfH4o", "v-g-j2uTByM"], "start_seconds": ["410", "30"], "properties": ["engine, run, loud", "ticktocks, clock, ticktocks"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a baby cries and a woman moans", "a telephone rings followed by a woman talking"], "sample_ids": ["smDKStoHBJo", "tGcFnX0GHI"], "start_seconds": ["0", "0"], "properties": ["a, cry, woman", "ring, talk, woman"], "captions_pred_video": ["a man holding a crying baby in his arms", null], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a dial tone sounds followed by a woman speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a motor runs and stops, and animals squawk and croak", "a man speaks as a car is passing by"], "sample_ids": ["s4tUs779vBA", "sK4u5T8hW78"], "start_seconds": ["160", "30"], "properties": ["a, sound, stop", "a, car, pass"], "captions_pred_video": ["game in 10 words or less - screenshot 1", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a car is revving and a man is speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["some tunes played by whistling", "a duck quacks continuously"], "sample_ids": ["u6BnG6YZqJ4", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["tune, play, whistling", "quacks, continuously, duck"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a person whistling a song", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "a person snoring several times"], "sample_ids": ["ukg5L09Wpvo", "spJCm8tD9Zo"], "start_seconds": ["150", "90"], "properties": ["clickety-clack, train, whistle", "snore, person, several"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a person is snoring loudly"], "question": "which is louder", "label": 0}, {"captions": ["a person burps loudly for a long time nearby", "small dogs growl, bark and yip."], "sample_ids": ["vf44CgrjT0A", "sShpyu2l4YQ"], "start_seconds": ["20", "0"], "properties": ["loud, long, person", "growl, bark, yip"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "the puppies are playing with a toy"], "captions_pred_audio": ["a loud burp", "a dog is barking and growling"], "question": "which entity is more likely to be a dog", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "wind blows as people chatter quietly"], "sample_ids": ["yeFvk9x0wWI", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["clack, bird, chirp", "wind, chatter, people"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage is blurry and out of focus"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "water flows and trickles"], "sample_ids": ["xyx6eNVEYRY", "tB7hWb9gTuQ"], "start_seconds": ["380", "30"], "properties": ["loud, engine, muffles", "water, flow, trickle"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["birds chirp and a dog breathes heavily", "a child speaks in closed space"], "sample_ids": ["y2ZBGpgbhHM", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["dog, chirp, breathe", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["birds chirping and a dog panting", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["xjvTpk2Zpr8", "su6FAOcOA8c"], "start_seconds": ["70", "4"], "properties": ["engine, run, wind", "engine, idle, woman"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "pigeons vocalize and birds chirp"], "sample_ids": ["tDVADusiIoc", "uiS58TNyUiw"], "start_seconds": ["60", "430"], "properties": ["wind, radio, waves", "vocalize, bird, chirp"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["y8dSeubCNI", "vbZ-0lGPneg"], "start_seconds": ["4", "30"], "properties": ["engine revving, people speaking, motorcycle", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["an engine revving and people talking in the background", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program playing?", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["tDVADusiIoc", "ukg5L09Wpvo"], "start_seconds": ["60", "150"], "properties": ["water, radio, man", "clickety-clack, train, whistle"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "a woman speaks as she rubs two objects together"], "sample_ids": ["smDKStoHBJo", "vzxHnu-SFEw"], "start_seconds": ["0", "80"], "properties": ["a, talk, baby, cry", "two objects, woman, speak"], "captions_pred_video": ["a man holding a crying baby in his arms", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a woman is speaking and breathing with mechanisms in the background "], "question": "which woman is speaking", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "birds chirp and objects are moved around"], "sample_ids": ["w2JXXIAdUdg", "yPUYU6t3rwo"], "start_seconds": ["10", "370"], "properties": ["snoring, distance, person", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a person snoring and a dog whimpering", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["scraping and female speech with distant music", "vehicles pass by on a roadway"], "sample_ids": ["yHeVV-xeOxQ", "tgbONvsP47Y"], "start_seconds": ["130", "0"], "properties": ["female, speech, music", "pass, vehicle, roadway"], "captions_pred_video": ["of a girl milking a goat's udder", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a car is driving on the road "], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "pigeons vocalize and birds chirp"], "sample_ids": ["yeFvk9x0wWI", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["clack, bird, chirp", "vocalize, bird, chirp"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "of the pigeon in the cage"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["wind blows strongly", "three men talk while wind blows and some liquid flows"], "sample_ids": ["w8uLijTqtlU", "vJ7JPEFhyLA"], "start_seconds": ["70", "16"], "properties": ["wind, blows, strongly", "three men, wind, flow"], "captions_pred_video": ["footage is blurry and shaky", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows a liquid flowing?", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "a machine beeps continuously"], "sample_ids": ["vimzuGQvdcU", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["a, man, yells", "beeps, machine, continuously"], "captions_pred_video": ["a group of people are rafting down a river", null], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["a toilet flushes and water drains", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["sfAvvZwdLCY", "xfaoyyzw2WU"], "start_seconds": ["20", "180"], "properties": ["water drains, flushes, water", "loud, jet engine, roar"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a toilet is flushed", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a baby laugh at a sputter", "an engine revs and a turning noise is made"], "sample_ids": ["sLUnaPT5gM8", "tOSWIURC-4"], "start_seconds": ["0", "0"], "properties": ["laugh, sputter, baby", "noise, engine, revs"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", null], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a lawn mower is running "], "question": "which noise is made by an engine", "label": 1}, {"captions": ["a woman and man are speaking", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["vbpKkWvfOu4", "sLUnaPT5gM8"], "start_seconds": ["560", "0"], "properties": ["two people, speaking, woman, man", "loud, laughter, intermittent"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["multiple people speak and children yell while water gurgles", "people speaking indiscriminately in the distance with a person snoring loudly nearby"], "sample_ids": ["vb1fPSDI4c", "w2JXXIAdUdg"], "start_seconds": ["30", "10"], "properties": ["multiple, people, yell", "snoring, distance, person"], "captions_pred_video": [null, "a close up shot of a person's mouth with a toothbrush in it"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a person snoring and a dog whimpering"], "question": "which entity is more quiet", "label": 1}, {"captions": ["wind blowing followed by a zoom", "wind blowing followed by a zoom"], "sample_ids": ["vr8ZXjEBhMQ", "vr8ZXjEBhMQ"], "start_seconds": ["150", "150"], "properties": ["wind, blow, zoom", "wind, blow, zoom"], "captions_pred_video": ["is taken from a motorcycle's point of view", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is blowing the wind", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["uKCSGgof8gI", "uEU-Hg5MTN8"], "start_seconds": ["12", "27"], "properties": ["chirps, distance, signal", "a woman, laughs, animal"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a video of a woman laughing?", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["vlJS7LN2XyM", "wSVhSdj0F0"], "start_seconds": ["30", "10"], "properties": ["background, clocks, ticking", "horn honks, keys jingle, electronic beep"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", null], "captions_pred_audio": ["a ticktock of a clock", "a car horn honks and keys jangle with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a helicopter engine idles continuously", "a horn rings out as a machine runs by"], "sample_ids": ["ugHJF0hfYkg", "slZLHwNbbt4"], "start_seconds": ["10", "300"], "properties": ["engine, idle, continuously", "a, horn, run"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a helicopter is flying overhead ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["dogs barking and whimpering", "a man speaks as a motor runs in the background"], "sample_ids": ["tIY7qOV3rEM", "xZepNM9qcRA"], "start_seconds": ["0", "30"], "properties": ["barking, whimpering, dog", "background, motor, run"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is quieter", "label": 1}, {"captions": ["a vehicle engine revs and tires squeal", "a clock ticktocks"], "sample_ids": ["yDoT73BWsdA", "v-g-j2uTByM"], "start_seconds": ["10", "30"], "properties": ["engine revs, tires squeal, vehicle", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["yks4cLgIDMc", "xfaoyyzw2WU"], "start_seconds": ["170", "180"], "properties": ["background, speaking, child", "loud, jet engine, roar"], "captions_pred_video": ["footage of two kids wrestling on the floor", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking and a child is crying", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a propeller moves loudly nearby", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["ugHJF0hfYkg", "wSVhSdj0F0"], "start_seconds": ["10", "10"], "properties": ["loud, propeller, move", "horn honks, keys jingle, electronic beep"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a car horn honks and keys jangle with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "winds blows roughly as a vehicle races past"], "sample_ids": ["u21-Z5gJCB8", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["background, voice, man", "wind, blows, vehicle"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["a clock ticktocks continuously", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vlJS7LN2XyM", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, ticktocks continuously", "gun, shoot, water"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a ticktock of a clock", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["yLy-WycbVVE", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["background, people, talk", "beeps, hit, woman"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "cats meow and then a person begins to talk while the cats continue to meow"], "sample_ids": ["xERFUeZONz8", "x5cuQjOdM3E"], "start_seconds": ["0", "30"], "properties": ["ring, approach, traffic", "cat, talk, meow"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "a black background with an airplane flying in the sky"], "captions_pred_audio": ["an emergency vehicle siren blares", "a cat meows and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vVhthZ45k3Y", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["cat, purr, hiss", "applause, audience, yells"], "captions_pred_video": ["footage is blurry and out of focus", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "people applaud and hoot and chat quietly"], "sample_ids": ["vmrxwuAMb2I", "wwyfGO2J4"], "start_seconds": ["40", "90"], "properties": ["a dog, inhales, exhales", "people, applaud, hoot"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", null], "captions_pred_audio": ["a dog barks and growls", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a propeller moves loudly nearby", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["ugHJF0hfYkg", "zj2R0XoFr5k"], "start_seconds": ["10", "50"], "properties": ["loud, propeller, move", "airplane, boy, fly"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman speaks while a helicopter flies overhead "], "question": "which is not a moving object", "label": 0}, {"captions": ["people converse as a motor runs and air brakes hiss", "a man speaks as a motor runs in the background"], "sample_ids": ["zFjIWfSD-4", "xZepNM9qcRA"], "start_seconds": ["410", "30"], "properties": ["People, motor, brakes", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["yZrFNS7GFBQ", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["pigeon, buzzes, insect", "water, radio, man"], "captions_pred_video": ["of the bird in the cage", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["an owl hoots in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["roadway noise occurs and a truck accelerates", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["tgbONvsP47Y", "xKB8O8LTs6s"], "start_seconds": ["0", "70"], "properties": ["noise, truck, accelerate", "music, gunfire, explosion"], "captions_pred_video": ["footage of a fire truck entering a garage", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a car is driving on the road ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["multiple insects buzz over rustling wind", "a clock ticktocks"], "sample_ids": ["tMJne1a4AFI", "v-g-j2uTByM"], "start_seconds": ["0", "30"], "properties": ["wind, buzz, rustling", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a swarm of bees on the ground", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a swarm of bees buzzing around", "a clock is ticking loudly"], "question": "which entity is not a clock?", "label": 0}, {"captions": ["a flush is followed by gurgling water, then another flush", "water splashes and a motorboat passes as people yell"], "sample_ids": ["tqR406bGiE", "w5W5Kqtc8E"], "start_seconds": ["40", "100"], "properties": ["flush, water, gurgle", "water, splashes, motorboat"], "captions_pred_video": [null, null], "captions_pred_audio": ["a toilet is flushed", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about water?", "label": 0}, {"captions": ["a train horn blows as it passes by", "an airplane engine runs"], "sample_ids": ["zVacuqSb4LI", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["horn, blows, train", "engine, airplane, runs"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a car is driving by on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["s6DESzUTGjY", "zFjIWfSD-4"], "start_seconds": ["16", "410"], "properties": ["wind, laugh, woman", "People, motor, brakes"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", null], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is more likely to be in a car", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "a person snores loudly multiple times at a close distance"], "sample_ids": ["tw76HGONaKg", "sSMl2vc3ek"], "start_seconds": ["570", "20"], "properties": ["A, game, keyboard", "loud, multiple, distance"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", null], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a person snoring loudly"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a infant makes noise and is excited", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["wIJK3-5y0kA", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["noise, excited, infant", "harsh, wind, blows"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking with wind noise in the background "], "question": "which entity is not a person", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "a car speeding up in the distance"], "sample_ids": ["wy1eKjR7KC0", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["people, talk, distance", "distance, car, speed"], "captions_pred_video": ["two police officers riding motorcycles down the street", null], "captions_pred_audio": ["a man is speaking and a siren is going off", "a race car accelerates and revs its engine "], "question": "which is farther away", "label": 0}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "a infant makes noise and is excited"], "sample_ids": ["vW4x7S1VfQc", "wIJK3-5y0kA"], "start_seconds": ["150", "30"], "properties": ["clacking, oil, woman", "noise, excited, infant"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["food sizzles in a frying pan", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a clock ticktocks in wind", "a stream of water runs briefly"], "sample_ids": ["yVumC9TGknc", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["ticktocks, clock, wind", "stream, water, run"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a series of beeps and chirps", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["xSKJGCItUWE", "vbZ-0lGPneg"], "start_seconds": ["10", "30"], "properties": ["engine, run, boy", "a woman, a television program, a bird"], "captions_pred_video": ["footage of the helicopter flying in the room", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vddP56-ogds", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["liquid, laughs, man", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a woman is speaking while food is frying in the background"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "a man talks followed by a woman shouting"], "sample_ids": ["sncRqQ67iJU", "s3cTDAj31g"], "start_seconds": ["460", "80"], "properties": ["loud, repeatedly, man", "man, talk, woman"], "captions_pred_video": ["of an airplane flying in the dark sky at night", null], "captions_pred_audio": ["a person is snoring", "a man is speaking and a baby is crying"], "question": "which entity is a man talking to a woman?", "label": 1}, {"captions": ["small dogs yip and bark sharply", "people applaud and hoot and chat quietly"], "sample_ids": ["v-wcQf4BDY0", "wwyfGO2J4"], "start_seconds": ["120", "90"], "properties": ["bark, yip, sharply", "people, applaud, hoot"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", null], "captions_pred_audio": ["a dog barks and growls", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a car accelerates and wind blows"], "sample_ids": ["vBHyYJ8pL0", "u0TrcHhkPQ"], "start_seconds": ["2", "20"], "properties": ["noise, door, opening", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a race car accelerates and revs its engine "], "question": "which entity is a car?", "label": 1}, {"captions": ["bees buzz and wind blows", "wind blows as people chatter quietly"], "sample_ids": ["tMJne1a4AFI", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["bees buzz, wind blows, bees", "wind, chatter, people"], "captions_pred_video": ["a swarm of bees on the ground", "footage is blurry and out of focus"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a duck quacks several times", "an infant crying frantically"], "sample_ids": ["vh30P49Po6s", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["quacks, duck, several", "cry, infant, frantically"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "of the baby crying in the car seat"], "captions_pred_audio": ["a duck is quacking loudly", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["uYT5gxnyMWM", "tw76HGONaKg"], "start_seconds": ["50", "570"], "properties": ["person, spray, yell", "A, game, keyboard"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man speaks and types on a computer keyboard "], "question": "which entity is a video game?", "label": 1}, {"captions": ["a motorcycle engine works nearby", "water running down a sink while a man is talking"], "sample_ids": ["tOSWIURC-4", "vSeGhaZt-aI"], "start_seconds": ["0", "50"], "properties": ["engine, work, nearby", "water, sink, talk"], "captions_pred_video": [null, "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a lawn mower is running ", "a man is speaking and pouring liquid with background noise "], "question": "which is a source of water", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vddP56-ogds", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["liquid, laughs, man", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["goats bleat and metal clings", "a boat travels through the waves as the wind blows loudly and a man speaks over a radio"], "sample_ids": ["tH17JPjDPnc", "tDVADusiIoc"], "start_seconds": ["260", "60"], "properties": ["bleat, metal, clings", "wind, radio, waves"], "captions_pred_video": ["feed of the goats eating hay in the barn", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["waves crash against a shoreline and people speak", "a baby cries and fusses, a woman speaks, and a man speaks"], "sample_ids": ["yFB25fqfU8I", "wyllXV6PjKo"], "start_seconds": ["300", "30"], "properties": ["wave, crash, shoreline", "a baby, a woman, a man"], "captions_pred_video": ["footage of a person surfing in the ocean", null], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "a woman speaks and a baby cries"], "question": "which entity is more likely to be a video of a baby crying?", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "a woman speaks happily and an animal chirps"], "sample_ids": ["zj2R0XoFr5k", "uWAAAL4CIoc"], "start_seconds": ["50", "0"], "properties": ["airplane, fly, overhead", "a woman, chirps, animal"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", null], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a woman is speaking and a dog is barking "], "question": "which entity is a bird?", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "a duck quacks continuously"], "sample_ids": ["tDVADusiIoc", "vh30P49Po6s"], "start_seconds": ["60", "30"], "properties": ["man, radio, blows", "quacks, continuously, duck"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a duck is quacking loudly"], "question": "which entity is speaking", "label": 0}, {"captions": ["some people speak", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vbZ-0lGPneg", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "music, gunfire, explosion"], "captions_pred_video": ["of a man holding a baby duck in his hands", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a man speaks over intermittent keyboard taps"], "sample_ids": ["uzQnlJXBbOM", "tw76HGONaKg"], "start_seconds": ["50", "570"], "properties": ["ringing, beep, stop", "audio, man, keyboard"], "captions_pred_video": ["footage of a person using a cell phone on a table", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a telephone rings and a man speaks", "a man speaks and types on a computer keyboard "], "question": "which entity has a man speaking over intermittent keyboard taps?", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "a man speaks as a car is passing by"], "sample_ids": ["tQWGZLItBXk", "sK4u5T8hW78"], "start_seconds": ["170", "30"], "properties": ["voice, music, whoosh", "a, car, pass"], "captions_pred_video": ["worms revolution screenshots", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xOZfdgAgJ9o", "tDVADusiIoc"], "start_seconds": ["40", "60"], "properties": ["woman, whimpering, speaking", "water, radio, man"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["an audience gives applause as a man yells and a group sings", "an airplane engine spools and people speak"], "sample_ids": ["tdWhHV3X25Q", "wTjoRj1se3U"], "start_seconds": ["60", "390"], "properties": ["applause, audience, yells", "airplane, engine, spool"], "captions_pred_video": ["a man is talking to another man on a stage in front of a microphone", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a jet engine is running and people are talking"], "question": "which entity is a moving object", "label": 1}, {"captions": ["continuous snoring", "a clock ticktocks"], "sample_ids": ["sLkeqCDJIyw", "v-g-j2uTByM"], "start_seconds": ["120", "30"], "properties": ["loud, snoring, noise", "ticktocks, clock, ticktocks"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a person is snoring loudly", "a clock is ticking loudly"], "question": "which entity makes a ticktocks noise", "label": 1}, {"captions": ["people speak and tapping occurs", "pigeons vocalize and birds chirp"], "sample_ids": ["tFCUUGdREgA", "uiS58TNyUiw"], "start_seconds": ["70", "430"], "properties": ["people, tap, speak", "vocalize, bird, chirp"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a clock ticktocks briefly", "a vehicle engine accelerating then running on idle"], "sample_ids": ["u7C-AEBQM", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, ticktocks briefly", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a ticktock of a clock", "an engine is idling"], "question": "which is a moving object", "label": 1}, {"captions": ["water runs into a sink while men speak", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vzceMbklWc", "vfYTJq7nU"], "start_seconds": ["180", "130"], "properties": ["water, sink, run", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and a man is speaking", "a duck quacks and a woman speaks"], "question": "which entity is about water?", "label": 0}, {"captions": ["a helicopter engine runs continuously", "a man speaks as a car is passing by"], "sample_ids": ["ugHJF0hfYkg", "sK4u5T8hW78"], "start_seconds": ["10", "30"], "properties": ["engine, running, continuously", "a, car, pass"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is stationary", "label": 1}, {"captions": ["a motorcycle engine is idling", "an airplane engine runs"], "sample_ids": ["vZAqdHZ81yA", "yVPZ2MNWpms"], "start_seconds": ["180", "0"], "properties": ["engine, motorcycle, idling", "engine, airplane, runs"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["an engine is idling loudly", "a car is driving by on the road "], "question": "which entity has a moving engine", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "waves crash against a shoreline and people speak"], "sample_ids": ["tw76HGONaKg", "yFB25fqfU8I"], "start_seconds": ["570", "300"], "properties": ["A, game, keyboard", "wave, crash, shoreline"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which is a natural phenomenon", "label": 1}, {"captions": ["a beep repeats multiple times", "a stream of water flows as people talk and wind blows"], "sample_ids": ["y682ml90jGw", "xBxDz0CFVn0"], "start_seconds": ["11", "30"], "properties": ["beep, repeat, multiple", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a beeping sound is being made ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a large crowd cheers and applauds", "a duck quacks continuously"], "sample_ids": ["rqfQRErjfk8", "vh30P49Po6s"], "start_seconds": ["170", "30"], "properties": ["crowd, cheers, applauds", "quacks, continuously, duck"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "a wooden clack accompanies nearby chirping birds"], "sample_ids": ["y8dSeubCNI", "yeFvk9x0wWI"], "start_seconds": ["4", "30"], "properties": ["men, women, car", "clack, bird, chirp"], "captions_pred_video": [null, "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["an engine revving and people talking in the background", "birds chirp in the background as a car drives by "], "question": "which entity is quieter", "label": 1}, {"captions": ["paper folding and crinkling", "some men converse over an engine running"], "sample_ids": ["zPpG3RD8lSs", "sCiy7QS1U"], "start_seconds": ["20", "300"], "properties": ["paper, fold, crinkle", "men, converse, engine"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", null], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a man speaks while a motorcycle revs and accelerates "], "question": "which is a more active scene", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["v0x1odnXtP0", "tDVADusiIoc"], "start_seconds": ["210", "60"], "properties": ["keyboard, type, computer", "water, radio, man"], "captions_pred_video": ["how to make money on youtube in spanish", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a person is typing on a keyboard", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["ukg5L09Wpvo", "tDVADusiIoc"], "start_seconds": ["150", "60"], "properties": ["a train, a horn, a bell", "water, radio, man"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a horn?", "label": 0}, {"captions": ["paper is repeatedly crumpled and crinkled", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vms5XGTDVQc", "wDVMhEdTiVw"], "start_seconds": ["220", "30"], "properties": ["paper, crumpled, crinkled", "gun, shoot, water"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["paper is crumpled and crinkled", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is not a gun?", "label": 0}, {"captions": ["water is sprayed across a hard surface", "people applaud and hoot and chat quietly"], "sample_ids": ["sQwlkXjQabo", "wwyfGO2J4"], "start_seconds": ["10", "90"], "properties": ["water, spray, surface", "people, applaud, hoot"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", null], "captions_pred_audio": ["spraying followed by silence", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be a performance", "label": 1}, {"captions": ["a clock ticktocks briefly", "a crowd yells, reacts and applauds"], "sample_ids": ["u7C-AEBQM", "wztCSUxOf8"], "start_seconds": ["30", "130"], "properties": ["ticktocks, clock, ticktocks briefly", "a crowd, yells, applauds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and a crowd is clapping"], "question": "which entity is more animated", "label": 1}, {"captions": ["an airplane engine spools and people speak", "waves crash against a shoreline and people speak"], "sample_ids": ["wTjoRj1se3U", "yFB25fqfU8I"], "start_seconds": ["390", "300"], "properties": ["airplane, engine, spool", "wave, crash, shoreline"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a jet engine is running and people are talking", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["y8dSeubCNI", "wDVMhEdTiVw"], "start_seconds": ["4", "30"], "properties": ["men, women, car", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["an engine revving and people talking in the background", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used for hunting", "label": 1}, {"captions": ["motors runs briefly and tires screech", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["yRx9txMcBl0", "tDVADusiIoc"], "start_seconds": ["40", "60"], "properties": ["motors, tires, screech", "water, radio, man"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water?", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "wind blows as people chatter quietly"], "sample_ids": ["x6ijhqRY38s", "xBxDz0CFVn0"], "start_seconds": ["250", "30"], "properties": ["something metal, glass, hit", "wind, chatter, people"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["wind blowing followed by a zoom", "water pouring and bubbling"], "sample_ids": ["vr8ZXjEBhMQ", "uyRfq-jKPpo"], "start_seconds": ["150", "50"], "properties": ["wind, blow, zoom", "water, bubbles, pouring"], "captions_pred_video": ["is taken from a motorcycle's point of view", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "water is running from a faucet"], "question": "which entity is more likely to be a video of a windy day?", "label": 0}, {"captions": ["an small aircraft engine runs and a boy speaks", "someone is typing on a computer keyboard"], "sample_ids": ["xSKJGCItUWE", "v0x1odnXtP0"], "start_seconds": ["10", "210"], "properties": ["engine, run, boy", "keyboard, type, computer"], "captions_pred_video": ["footage of the helicopter flying in the room", "how to make money on youtube in spanish"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a drill runs and two people laugh", "wind blows as people chatter quietly"], "sample_ids": ["tEE3MpBt1sg", "xBxDz0CFVn0"], "start_seconds": ["50", "30"], "properties": ["two people, laugh, drill", "wind, chatter, people"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage is blurry and out of focus"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sEprKHm8Sj8", "vJ7JPEFhyLA"], "start_seconds": ["90", "16"], "properties": ["car, tires, slows", "three men, wind, flow"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a car?", "label": 1}, {"captions": ["a church bell rings several times", "some men converse over an engine running"], "sample_ids": ["sUVVjE3Ucp8", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["ring, bell, several", "men, converse, engine"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", null], "captions_pred_audio": ["a church bell is ringing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "pigeons vocalize and birds chirp"], "sample_ids": ["ukg5L09Wpvo", "uiS58TNyUiw"], "start_seconds": ["150", "430"], "properties": ["clickety-clack, train, whistle", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "of the pigeon in the cage"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird?", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "a loud engine muffles a man as he speaks"], "sample_ids": ["t97k0cejSQE", "xyx6eNVEYRY"], "start_seconds": ["250", "380"], "properties": ["bird, chirp, insect", "loud, engine, muffles"], "captions_pred_video": ["a bee on a purple thistle flower", "footage of a helicopter landing on a runway at an airport"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "an aircraft engine is running and a man is speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["ukxt9I7eMMg", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["food, pan, cook", "multiple, people, yell"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["a clicking followed by some people laughing and a kid speaking", "some tunes played by whistling"], "sample_ids": ["vz8868znkVQ", "u6BnG6YZqJ4"], "start_seconds": ["60", "0"], "properties": ["audio, click, kid speaking", "tune, play, whistling"], "captions_pred_video": ["a video of a plane flying over a cloudy sky", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a baby is laughing and breathing with background noise ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a clock ticktocks"], "sample_ids": ["xKB8O8LTs6s", "v-g-j2uTByM"], "start_seconds": ["70", "30"], "properties": ["music, radio, gunshots", "ticktocks, clock, ticktocks"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "a stream of water flows as people talk and wind blows"], "sample_ids": ["xOZfdgAgJ9o", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["woman, whimpering, speaking", "stream, water, flow"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking with wind noise in the background "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "a person snoring several times"], "sample_ids": ["zsLxS-uLJTw", "spJCm8tD9Zo"], "start_seconds": ["20", "90"], "properties": ["horn, blast, train", "snore, person, several"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a person is snoring loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "a man speaks followed by another man speaking outside"], "sample_ids": ["sxYkFKFIZD0", "viuTg1M-dqg"], "start_seconds": ["20", "30"], "properties": ["screech, man, door", "two men, speak, follow"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more than one speaker?", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "several insects fly while two men talk"], "sample_ids": ["rqu8iB22IY", "s-T9OVOiMLo"], "start_seconds": ["5", "330"], "properties": ["sound, repeats, laugh", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about insects?", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "birds chirp and objects are moved around"], "sample_ids": ["siJFXfGWgDk", "yPUYU6t3rwo"], "start_seconds": ["50", "370"], "properties": ["a, bird, vehicle", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "insects buzz and a man speaks"], "question": "which entity has more birds", "label": 1}, {"captions": ["an insect buzzes around continuously", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["v25l1jef3JY", "tdWhHV3X25Q"], "start_seconds": ["0", "60"], "properties": ["buzzes, continuously, insect", "applause, audience, yells"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a man is speaking and a crowd is clapping"], "question": "which entity is not a person", "label": 0}, {"captions": ["a person uses a saw to cut some wood", "a propeller rotates loudly and intensely"], "sample_ids": ["sHbXC6na9hg", "ugHJF0hfYkg"], "start_seconds": ["0", "10"], "properties": ["a person, saw, wood", "loud, intense, propeller"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["an engine is idling and vibrating", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["a helicopter engine idles continuously", "people speak in the background as a clock ticktocks"], "sample_ids": ["ugHJF0hfYkg", "vZAw4apG0Es"], "start_seconds": ["10", "30"], "properties": ["engine, idle, continuously", "background, clock, ticktocks"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["a helicopter is flying overhead ", "a clock is ticking and people are talking"], "question": "which entity is a clock", "label": 1}, {"captions": ["an electric engine works nearby followed by a child talking", "a toilet flushes and a female speaks"], "sample_ids": ["xSKJGCItUWE", "yaln9y8I7ms"], "start_seconds": ["10", "230"], "properties": ["engine, work, child", "female, flushes, toilet"], "captions_pred_video": ["footage of the helicopter flying in the room", "footage is blurry and out of focus"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a toilet flushes and a man speaks"], "question": "which entity has a female speaking?", "label": 1}, {"captions": ["water flows as men speak and yell", "water pouring and bubbling"], "sample_ids": ["vJ7JPEFhyLA", "uyRfq-jKPpo"], "start_seconds": ["16", "50"], "properties": ["water, flow, men", "water, bubbles, pouring"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "water is running from a faucet"], "question": "which entity is more calm", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "a telephone rings followed by a woman talking"], "sample_ids": ["smDKStoHBJo", "tGcFnX0GHI"], "start_seconds": ["0", "0"], "properties": ["a, talk, baby, cry", "ring, talk, woman"], "captions_pred_video": ["a man holding a crying baby in his arms", null], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation between two people?", "label": 0}, {"captions": ["a man speaks as a boat engine runs", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["wtDqrBygTcU", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["man, engine, run", "men, talk, cars"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking and a motor is running", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has a man speaking as an engine runs?", "label": 0}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "a person snores loudly multiple times at a close distance"], "sample_ids": ["w9lpbUn0hPc", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["male, wind, rustling", "loud, multiple, distance"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", null], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "a clock ticks quietly and rhythmically"], "sample_ids": ["zofjfKhqLk8", "u7C-AEBQM"], "start_seconds": ["10", "30"], "properties": ["background, metal, clank", "ticks, rhythmic, quiet"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a ticktock of a clock"], "question": "which entity is quieter", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "a person sniffs and sneezes"], "sample_ids": ["wy1eKjR7KC0", "uRlbY6aoBU"], "start_seconds": ["30", "0"], "properties": ["people, talk, distance", "sneezes, person, sniffs"], "captions_pred_video": ["two police officers riding motorcycles down the street", null], "captions_pred_audio": ["a man is speaking and a siren is going off", "a man is sneezing "], "question": "which entity is more likely to be a sneeze", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "wind blowing followed by a zoom"], "sample_ids": ["w-4gHptFNuU", "vr8ZXjEBhMQ"], "start_seconds": ["21", "150"], "properties": ["engine revs, accelerates, bump", "wind, blow, zoom"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a car accelerates and revs its engine ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "people speak in a closed space"], "sample_ids": ["zofjfKhqLk8", "sTpirNYo8vQ"], "start_seconds": ["10", "30"], "properties": ["background, metal, clank", "people, space, speak"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "of a man taking a selfie on a bus"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking while a car is revving and accelerating "], "question": "which entity is more quiet", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "a pigeon cooing as an insect buzzes by briefly"], "sample_ids": ["wvKpEYswXO0", "yZrFNS7GFBQ"], "start_seconds": ["150", "30"], "properties": ["plastic, tap, speak", "pigeon, buzzes, insect"], "captions_pred_video": ["of the person preparing food in the kitchen", "of the bird in the cage"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "an owl hoots in the background "], "question": "which entity is a bird?", "label": 1}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["smDKStoHBJo", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["a, infant, speaking", "a, scream, girl"], "captions_pred_video": ["a man holding a crying baby in his arms", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a woman is speaking and a baby is crying"], "question": "which entity has a scream", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "a stream of water runs briefly"], "sample_ids": ["wwyfGO2J4", "x-PeY8Yb8M4"], "start_seconds": ["90", "300"], "properties": ["people, applaud, hoot", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["xvDdE3zNf8Y", "w34HjHr6gAY"], "start_seconds": ["120", "30"], "properties": ["a, female, speaks", "beeps, hit, woman"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a woman speaks and crumples paper", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["someone whistles a tune", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sIXTftIuUgw", "xBxDz0CFVn0"], "start_seconds": ["90", "30"], "properties": ["someone, tune, whistle", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a person whistling a song", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a telephone rings followed by a woman talking"], "sample_ids": ["wvKpEYswXO0", "tGcFnX0GHI"], "start_seconds": ["150", "0"], "properties": ["sound, water, running", "ring, talk, woman"], "captions_pred_video": ["of the person preparing food in the kitchen", null], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a dial tone sounds followed by a woman speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["se87d6yxEOA", "xKB8O8LTs6s"], "start_seconds": ["10", "70"], "properties": ["run, whistle, pass", "music, gunfire, explosion"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more active", "label": 1}, {"captions": ["food is frying while a woman speaks", "water splashes and a door squeaks"], "sample_ids": ["yhQ2Lg-7qDY", "sdXV-ylviw"], "start_seconds": ["130", "190"], "properties": ["food, woman, speak", "sound, splash, door"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", null], "captions_pred_audio": ["a faucet is running and a man is speaking", "a dog barks and taps with background noise "], "question": "which entity is silent", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["uYT5gxnyMWM", "zj2R0XoFr5k"], "start_seconds": ["50", "50"], "properties": ["a, scream, girl", "airplane, boy, fly"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman speaks while a helicopter flies overhead "], "question": "which entity is about a girl speaking followed by a scream?", "label": 0}, {"captions": ["a beep repeats multiple times", "paper is crumpling consistently"], "sample_ids": ["y682ml90jGw", "v5cSxLaHADY"], "start_seconds": ["11", "0"], "properties": ["beep, repeat, multiple", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a beeping sound is being made ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["vBHyYJ8pL0", "zFjIWfSD-4"], "start_seconds": ["2", "410"], "properties": ["noise, door, opening", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a door opening and closing?", "label": 0}, {"captions": ["a baby laugh at a sputter", "a child speaks in closed space"], "sample_ids": ["sLUnaPT5gM8", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["laugh, sputter, baby", "child, space, speak"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a child", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "an infant crying frantically"], "sample_ids": ["sNB8zxXneIM", "zwOBqeFTgiU"], "start_seconds": ["20", "30"], "properties": ["several, quack, cocks", "cry, infant, frantically"], "captions_pred_video": ["a group of geese in a cage", "of the baby crying in the car seat"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "paper is crumpling consistently"], "sample_ids": ["tDlysoZiA1I", "v5cSxLaHADY"], "start_seconds": ["0", "0"], "properties": ["animal, grunt, multiple", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a train horn sounds as the train approaches", "a child speaks in closed space"], "sample_ids": ["slZLHwNbbt4", "yW6FWLSLkx4"], "start_seconds": ["300", "40"], "properties": ["train, horn, sound", "child, space, speak"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a man speaks as a car is passing by"], "sample_ids": ["weDbePuc-Xc", "sK4u5T8hW78"], "start_seconds": ["40", "30"], "properties": ["music, slaps, human", "a, car, pass"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["heavy rain splashes as it falls", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["wP8ZKrlx3oA", "yajyRTUQk3U"], "start_seconds": ["40", "400"], "properties": ["fall, rain, splash", "a woman, something, fried"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of something being fried?", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "water pouring and bubbling"], "sample_ids": ["uZesmtKZGSw", "uyRfq-jKPpo"], "start_seconds": ["250", "50"], "properties": ["car, track, man", "water, bubbles, pouring"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "vehicles pass by on a roadway"], "sample_ids": ["ul60S8TXDA8", "tgbONvsP47Y"], "start_seconds": ["60", "0"], "properties": ["sound, distance, bell", "pass, vehicle, roadway"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["people speak softly as food sizzles", "vehicles pass by on a roadway"], "sample_ids": ["yhQ2Lg-7qDY", "tgbONvsP47Y"], "start_seconds": ["130", "0"], "properties": ["food, sizzle, speak", "pass, vehicle, roadway"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a machine beeps continuously", "multiple people speak and children yell while water gurgles"], "sample_ids": ["y682ml90jGw", "vb1fPSDI4c"], "start_seconds": ["11", "30"], "properties": ["beeps, machine, continuously", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a church bell rings several times", "someone is typing on a computer keyboard"], "sample_ids": ["sUVVjE3Ucp8", "v0x1odnXtP0"], "start_seconds": ["0", "210"], "properties": ["ring, bell, several", "keyboard, type, computer"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", "how to make money on youtube in spanish"], "captions_pred_audio": ["a church bell is ringing ", "a person is typing on a keyboard"], "question": "which is not a type of computer", "label": 0}, {"captions": ["a vehicle engine runs and someone speaks", "a man speaks, then dials a rotary telephone"], "sample_ids": ["zF8yoL0rkbI", "tK4VlLsNxak"], "start_seconds": ["30", "120"], "properties": ["engine, run, someone", "a, dial, telephone"], "captions_pred_video": ["footage of the traffic on the street at night", "person is wearing a headset and holding a remote control in his hand"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a man is speaking and using a sewing machine"], "question": "which entity is a rotary telephone?", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "vehicle engines race around a track as a man commentates"], "sample_ids": ["spJCm8tD9Zo", "sZPuqDgX2V0"], "start_seconds": ["90", "30"], "properties": ["snores, wheezes, sleeps", "commentator, race, track"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a helicopter is flying overhead "], "question": "which entity is a video of a race?", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "three men talk while wind blows and some liquid flows"], "sample_ids": ["t25U-v4k4ts", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["a, chirps, bird", "three men, wind, flow"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more moving parts", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a man speaks as a car is passing by"], "sample_ids": ["s4Uz1Ffgo04", "sK4u5T8hW78"], "start_seconds": ["100", "30"], "properties": ["roars, background, people speaking", "a, car, pass"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is quieter", "label": 1}, {"captions": ["a car accelerates and wind blows", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["u0TrcHhkPQ", "vbZ-0lGPneg"], "start_seconds": ["20", "30"], "properties": ["accelerates, wind, blows", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "some tunes played by whistling"], "sample_ids": ["xZepNM9qcRA", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["background, motor, run", "tune, play, whistling"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["an airplane engine roars increasingly louder", "water flows and trickles"], "sample_ids": ["vBslzh7saPw", "tB7hWb9gTuQ"], "start_seconds": ["90", "30"], "properties": ["engine, roar, louder", "water, flow, trickle"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a jet engine roars and accelerates ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["long loud burping by a man", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["xmiUIOhtZyQ", "wqZ135Ssz0"], "start_seconds": ["60", "60"], "properties": ["loud, burp, man", "two men, woman, birds"], "captions_pred_video": ["homer simpson drinking a beer", null], "captions_pred_audio": ["a person burps and music plays in the background ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["two frogs croak at each other", "a door slams shut roughly"], "sample_ids": ["zg0X6BnhOLQ", "zkKdxzNC97Y"], "start_seconds": ["410", "27"], "properties": ["two frogs, croak, at each other", "a door, slams, shut"], "captions_pred_video": ["footage of lightning in the sky at night", "footage of the door opening and closing in slow motion"], "captions_pred_audio": ["a frog is croaking", "a door is opened and closed"], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp and pigeons vocalize while walking around", "small dogs yip and bark sharply"], "sample_ids": ["wIvYjuR3nrg", "v-wcQf4BDY0"], "start_seconds": ["9", "120"], "properties": ["birds, pigeons, vocalize", "bark, yip, sharply"], "captions_pred_video": ["footage of a pigeon sitting on a roof with trees in the background", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["birds are chirping and cooing", "a dog barks and growls"], "question": "which animal is more vocal", "label": 1}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "people speak as gunfire rings out"], "sample_ids": ["vddP56-ogds", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["water, splash, person, laugh", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["wind blows strongly", "some clanking with distant murmuring"], "sample_ids": ["w8uLijTqtlU", "uMTTDZ2mb4"], "start_seconds": ["70", "30"], "properties": ["wind, blows, strongly", "clanking, murmuring, distant"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["the wind is blowing strongly", "people are talking and a car is driving by with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "water is sprayed across a hard surface"], "sample_ids": ["xO-Q2BlIIPU", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["two men, exclamation, speak", "water, spray, surface"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "spraying followed by silence"], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["someone sprays a liquid onto a hard surface making a hiss sound", "plastic is tapped on while someone speaks"], "sample_ids": ["zO-LSSY92ZM", "wvKpEYswXO0"], "start_seconds": ["30", "150"], "properties": ["liquid, surface, sound", "plastic, tap, speak"], "captions_pred_video": ["youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'", "of the person preparing food in the kitchen"], "captions_pred_audio": ["steam is hissing and hissing", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "wind blows as people chatter quietly"], "sample_ids": ["s59PfAghdkM", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["bird, chirp, background, horse, neigh", "wind, chatter, people"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "footage is blurry and out of focus"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "a car speeding up in the distance"], "sample_ids": ["x5cuQjOdM3E", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["cat, talk, meow", "distance, car, speed"], "captions_pred_video": ["a black background with an airplane flying in the sky", null], "captions_pred_audio": ["a cat meows and a woman speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "pigeons vocalize and birds chirp"], "sample_ids": ["zcDwZ6W7E3E", "uiS58TNyUiw"], "start_seconds": ["180", "430"], "properties": ["a, man, speak", "vocalize, bird, chirp"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a person", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "a pigeon cooing as an insect buzzes by briefly"], "sample_ids": ["yaln9y8I7ms", "yZrFNS7GFBQ"], "start_seconds": ["230", "30"], "properties": ["female, flushes, toilet", "pigeon, buzzes, insect"], "captions_pred_video": ["footage is blurry and out of focus", "of the bird in the cage"], "captions_pred_audio": ["a toilet flushes and a man speaks", "an owl hoots in the background "], "question": "which entity is a bird?", "label": 1}, {"captions": ["goats bleat and people speak", "a child speaks in closed space"], "sample_ids": ["z5iUE5h0EPs", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["goats bleat, people speak, language", "child, space, speak"], "captions_pred_video": ["of the goat in the barn", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a goat bleats and a man speaks", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a person is snoring while sleeping", "a car accelerates and wind blows"], "sample_ids": ["vJrjSeP17yE", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["a person is sleeping, snoring, person", "accelerates, wind, blows"], "captions_pred_video": ["a black background with a small plane flying in the sky", null], "captions_pred_audio": ["a person snoring loudly", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "winds blows roughly as a vehicle races past"], "sample_ids": ["xM4joTqDVp4", "xjvTpk2Zpr8"], "start_seconds": ["160", "70"], "properties": ["background, chirp, birds", "wind, blows, vehicle"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sapQIQUhFc", "xKB8O8LTs6s"], "start_seconds": ["280", "70"], "properties": ["liquid, flow, distance", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["various birds chirp and squeal, and an animal grunts", "a woman speaks happily and an animal chirps"], "sample_ids": ["tDlysoZiA1I", "uWAAAL4CIoc"], "start_seconds": ["0", "0"], "properties": ["animal, grunts, chirps", "a woman, chirps, animal"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", null], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a woman is speaking and a dog is barking "], "question": "which entity has a woman speaking happily and an animal chirps?", "label": 1}, {"captions": ["wind blowing followed by a zoom", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vr8ZXjEBhMQ", "zl9Dqx-j7q4"], "start_seconds": ["150", "6"], "properties": ["wind, blow, zoom", "engine, laugh, loud"], "captions_pred_video": ["is taken from a motorcycle's point of view", "footage of a man driving a car in the dark"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a heavy rain falls endlessly", "paper folding and crinkling"], "sample_ids": ["wP8ZKrlx3oA", "zPpG3RD8lSs"], "start_seconds": ["40", "20"], "properties": ["heavy, rain, fall", "paper, fold, crinkle"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a heavy rain is falling on a surface", "the wind blows and a mouse clicks "], "question": "which entity is not a natural phenomenon", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "a toilet flushes and a female speaks"], "sample_ids": ["x6ijhqRY38s", "yaln9y8I7ms"], "start_seconds": ["250", "230"], "properties": ["something metal, glass, hit", "female, flushes, toilet"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["people speak and tapping occurs", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["tFCUUGdREgA", "uEU-Hg5MTN8"], "start_seconds": ["70", "27"], "properties": ["people, tap, speak", "a woman, laughs, animal"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has a more snorting animal", "label": 1}, {"captions": ["someone whistles a song", "a clock ticktocks"], "sample_ids": ["sIXTftIuUgw", "v-g-j2uTByM"], "start_seconds": ["90", "30"], "properties": ["someone, song, whistle", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a person whistling a song", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["wind blows and a stream of water flows nearby", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["sYITalLZjj4", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["stream, flow, wind", "wind, blow, vehicle"], "captions_pred_video": ["two ducks are swimming in the water near each other", null], "captions_pred_audio": ["wind blows and birds chirp", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a stream of water flowing nearby?", "label": 0}, {"captions": ["birds chirps while a siren signals in the distance", "paper is crumpling consistently"], "sample_ids": ["uKCSGgof8gI", "v5cSxLaHADY"], "start_seconds": ["12", "0"], "properties": ["chirps, distance, signal", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["birds fly and flutter around", "several insects fly while two men talk"], "sample_ids": ["wGKgwOP3h30", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["fly, flutter, around", "several, fly, men"], "captions_pred_video": ["of the pigeons in the coop", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["pigeons coo and flap their wings", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a video of flying?", "label": 0}, {"captions": ["a large bell chimes back and forth loudly", "vehicles pass by on a roadway"], "sample_ids": ["w2M4i1mklOA", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["loud, chime, bell", "pass, vehicle, roadway"], "captions_pred_video": ["footage of an antique clock", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "a man is filing a hard object"], "sample_ids": ["yI-KvObbDoY", "vveS8HT7Uog"], "start_seconds": ["260", "100"], "properties": ["sound, smack, wind", "a man, hard, object"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", "footage is of a workbench with various tools on it including a hammer and a screwdriver"], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "a man is filing and speaking with background noise and breathing "], "question": "which entity is about a man filing a hard object?", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "an infant crying frantically"], "sample_ids": ["yswmmRZFItk", "zwOBqeFTgiU"], "start_seconds": ["0", "30"], "properties": ["background, frog, croak", "cry, infant, frantically"], "captions_pred_video": ["a close up of a frog in the water", "of the baby crying in the car seat"], "captions_pred_audio": ["a frog is croaking", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["rwTERCUno", "w5W5Kqtc8E"], "start_seconds": ["90", "100"], "properties": ["engine, idle, sputter", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["an engine is idling and vibrating", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a child babbles as a woman speaks"], "sample_ids": ["sfAvvZwdLCY", "wEBlkGWVWwE"], "start_seconds": ["20", "260"], "properties": ["water drains, flushes, water", "a, babble, woman"], "captions_pred_video": ["footage of the toilet in the bathroom", "shows a person writing on the whiteboard"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking and a child is speaking with background noise and clapping "], "question": "which entity is a human", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "a car accelerates and wind blows"], "sample_ids": ["sfAvvZwdLCY", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["flushes, drains, water", "accelerates, wind, blows"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "a duck quacks continuously"], "sample_ids": ["wyllXV6PjKo", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["a kid, talk, cry", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman speaks and a baby cries", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["smDKStoHBJo", "tDVADusiIoc"], "start_seconds": ["0", "60"], "properties": ["a, talk, baby, cry", "water, radio, man"], "captions_pred_video": ["a man holding a crying baby in his arms", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["tapping occurs then a baby cries", "a man speaks as a motor runs in the background"], "sample_ids": ["wIJK3-5y0kA", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["a, cry, baby", "background, motor, run"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "a child speaks in closed space"], "sample_ids": ["y2ZBGpgbhHM", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["animal, growl, bird", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["birds chirping and a dog panting", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vuUVPzd2FXw", "vfYTJq7nU"], "start_seconds": ["160", "130"], "properties": ["a, steam, release", "rustling, ducks, quack"], "captions_pred_video": ["of the person cooking on the grill with a spatula", null], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "water pouring and bubbling"], "sample_ids": ["yeFvk9x0wWI", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["clack, bird, chirp", "water, bubbles, pouring"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "some men converse over an engine running"], "sample_ids": ["sShpyu2l4YQ", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["growl, bark, yip", "men, converse, engine"], "captions_pred_video": ["the puppies are playing with a toy", null], "captions_pred_audio": ["a dog is barking and growling", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "a stream of water flows as people talk and wind blows"], "sample_ids": ["uJV8NDaHqqk", "xBxDz0CFVn0"], "start_seconds": ["100", "30"], "properties": ["loud, fly, chirp", "stream, water, flow"], "captions_pred_video": ["a bee hive in a wooden box", "footage is blurry and out of focus"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "an adult male speaks and dials a rotary phone"], "sample_ids": ["tOj4tdLRaA", "tK4VlLsNxak"], "start_seconds": ["70", "120"], "properties": ["woman, laugh, baby", "An adult male speaks, dials, and speaks into a rotary phone"], "captions_pred_video": [null, "person is wearing a headset and holding a remote control in his hand"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking and using a sewing machine"], "question": "which entity is a person", "label": 1}, {"captions": ["water flows as men speak and yell", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vJ7JPEFhyLA", "zj2R0XoFr5k"], "start_seconds": ["16", "50"], "properties": ["water, flow, men", "airplane, boy, fly"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["spJCm8tD9Zo", "zj2R0XoFr5k"], "start_seconds": ["90", "50"], "properties": ["snores, wheezes, sleeps", "airplane, boy, fly"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a person is snoring loudly", "a woman speaks while a helicopter flies overhead "], "question": "which entity is moving", "label": 1}, {"captions": ["a car accelerates and wind blows", "water pouring and bubbling"], "sample_ids": ["u0TrcHhkPQ", "uyRfq-jKPpo"], "start_seconds": ["20", "50"], "properties": ["accelerates, wind, blows", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "someone whistles a tune"], "sample_ids": ["xyL9F5VrjkE", "sIXTftIuUgw"], "start_seconds": ["20", "90"], "properties": ["wind, motor, distance", "someone, tune, whistle"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", null], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a person whistling a song"], "question": "which is not a musical instrument", "label": 0}, {"captions": ["a duck quacks loudly and continuously", "some men converse over an engine running"], "sample_ids": ["vh30P49Po6s", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["loud, continuous, quacks", "men, converse, engine"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is quieter", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "a bird chirps in response to a woman chirping for the birds"], "sample_ids": ["uJV8NDaHqqk", "uOpoD0gGXcs"], "start_seconds": ["100", "120"], "properties": ["loud, fly, chirp", "chirps, woman, bird"], "captions_pred_video": ["a bee hive in a wooden box", "a herd of cows grazing in the field"], "captions_pred_audio": ["a swarm of bees buzzing around", "birds are chirping and a man is speaking"], "question": "which entity is a response to a human chirping?", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "frogs croak and vocalize"], "sample_ids": ["w6RTHR6AeAg", "yswmmRZFItk"], "start_seconds": ["40", "0"], "properties": ["call, owl, screech", "croak, vocalize, frog"], "captions_pred_video": [null, "a close up of a frog in the water"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a frog is croaking"], "question": "which animal is more likely to be a predator", "label": 1}, {"captions": ["continuous sneezing together with speech", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["x4dZyf9Gbj0", "ukg5L09Wpvo"], "start_seconds": ["130", "150"], "properties": ["continuous, sneeze, speech", "clickety-clack, train, whistle"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a woman sneezes and speaks", "a train blows its whistle and blows its horn "], "question": "which entity is continuous", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "water flows as men speak and yell"], "sample_ids": ["vuUVPzd2FXw", "vJ7JPEFhyLA"], "start_seconds": ["160", "16"], "properties": ["a, steam, release", "water, flow, men"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man talking while metallic objects are rapped and steam is released?", "label": 0}, {"captions": ["a baby cries and a woman speaks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["tMbMDvT50j8", "xBxDz0CFVn0"], "start_seconds": ["12", "30"], "properties": ["a, cry, woman", "stream, water, flow"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "footage is blurry and out of focus"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["scraping and female speech with distant music", "an infant crying as a woman laughs"], "sample_ids": ["yHeVV-xeOxQ", "xhmRY9yhC7c"], "start_seconds": ["130", "20"], "properties": ["female, speech, music", "a, laugh, infant"], "captions_pred_video": ["of a girl milking a goat's udder", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "someone is typing on a computer keyboard"], "sample_ids": ["uEU-Hg5MTN8", "v0x1odnXtP0"], "start_seconds": ["27", "210"], "properties": ["animal, grunts, snorts", "keyboard, type, computer"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "how to make money on youtube in spanish"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vlS6YMeWAPo", "yajyRTUQk3U"], "start_seconds": ["40", "400"], "properties": ["sheep, baa, birds", "a woman, something, fried"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a goat bleats and birds chirp", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["multiple ducks quack continuously", "goats bleat and people speak"], "sample_ids": ["wfHeoPDLMaM", "z5iUE5h0EPs"], "start_seconds": ["30", "30"], "properties": ["multiple, quack, continuously", "goats bleat, people speak, language"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "of the goat in the barn"], "captions_pred_audio": ["ducks are quacking", "a goat bleats and a man speaks"], "question": "which entity is speaking a language", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["y1saVTXsKwc", "tdWhHV3X25Q"], "start_seconds": ["80", "60"], "properties": ["a, dog, talk", "applause, audience, yells"], "captions_pred_video": ["a dog playing with a pink ball", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a dog barks and a man speaks", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a woman sneezes then speaks", "an infant crying as a woman laughs"], "sample_ids": ["x4dZyf9Gbj0", "xhmRY9yhC7c"], "start_seconds": ["130", "20"], "properties": ["sneezes, speaks, woman", "a, laugh, infant"], "captions_pred_video": ["footage is blurry and out of focus", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman sneezes and speaks", "a baby cries and a woman speaks"], "question": "which woman is laughing", "label": 1}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["t69a8aRKhmc", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["a, b, c", "applause, audience, yells"], "captions_pred_video": ["footage is blurry and out of focus", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking and a crowd is clapping"], "question": "which entity has more people", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "a person sniffs and sneezes"], "sample_ids": ["vqZuVbG6-HI", "uRlbY6aoBU"], "start_seconds": ["130", "0"], "properties": ["background, male, female", "sneezes, person, sniffs"], "captions_pred_video": ["footage is blurry because it's raining outside", null], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a man is sneezing "], "question": "which entity is a person", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "an engine runs loudly"], "sample_ids": ["u--KhUW8l1Y", "vqZuVbG6-HI"], "start_seconds": ["0", "130"], "properties": ["horn, siren, life", "loud, engine, run"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "plastic is tapped on while someone speaks"], "sample_ids": ["yYEVLuqEytU", "wvKpEYswXO0"], "start_seconds": ["40", "150"], "properties": ["grunt, slurp, background", "plastic, tap, speak"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "of the person preparing food in the kitchen"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["spYNpeN7rPY", "wz7N8YRy74I"], "start_seconds": ["1", "30"], "properties": ["a clock, ticktock, man", "rooster, crow, background, men"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a clock in the background?", "label": 0}, {"captions": ["a woman talks while something is fried and objects are tapped", "birds chirp and objects are moved around"], "sample_ids": ["yajyRTUQk3U", "yPUYU6t3rwo"], "start_seconds": ["400", "370"], "properties": ["a woman, something, fried", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["- a woman cooking in the kitchen", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "insects buzz and a man speaks"], "question": "which entity is about birds?", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "a woman speaks happily and an animal chirps"], "sample_ids": ["ukg5L09Wpvo", "uWAAAL4CIoc"], "start_seconds": ["150", "0"], "properties": ["a train, a horn, a bell", "a woman, chirps, animal"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", null], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vBHyYJ8pL0", "yajyRTUQk3U"], "start_seconds": ["2", "400"], "properties": ["noise, door, opening", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["some men converse over an engine running", "water is sprayed across a hard surface"], "sample_ids": ["sCiy7QS1U", "sQwlkXjQabo"], "start_seconds": ["300", "10"], "properties": ["men, converse, engine", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a child babbles as a woman speaks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["wEBlkGWVWwE", "vfYTJq7nU"], "start_seconds": ["260", "130"], "properties": ["a, babble, woman", "rustling, ducks, quack"], "captions_pred_video": ["shows a person writing on the whiteboard", null], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a duck quacks and a woman speaks"], "question": "which entity is about a child?", "label": 0}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "waves crash against a shoreline and people speak"], "sample_ids": ["t69a8aRKhmc", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["a, b, c", "wave, crash, shoreline"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more calm", "label": 0}, {"captions": ["birds chirp and a dog breathes heavily", "water is sprayed across a hard surface"], "sample_ids": ["y2ZBGpgbhHM", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["dog, chirp, breathe", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["birds chirping and a dog panting", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["tDVADusiIoc", "uEU-Hg5MTN8"], "start_seconds": ["60", "27"], "properties": ["wind, radio, waves", "a woman, laughs, animal"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be in a boat", "label": 0}, {"captions": ["a vehicle engine runs and someone speaks", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["zF8yoL0rkbI", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["engine, run, someone", "loud, jet engine, roar"], "captions_pred_video": ["footage of the traffic on the street at night", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "a large crowd cheers and applauds"], "sample_ids": ["xl2PIWyXaM", "rqfQRErjfk8"], "start_seconds": ["160", "170"], "properties": ["chirp, man, younger person", "crowd, cheers, applauds"], "captions_pred_video": [null, "a man hugging another man in front of an orchestra"], "captions_pred_audio": ["birds are chirping and people are talking", "a crowd of people clapping and cheering"], "question": "which entity is more active", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["zofjfKhqLk8", "tiDFTC-5vU"], "start_seconds": ["10", "30"], "properties": ["background, metal, clings", "male, duck, laugh"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "a child speaks in closed space"], "sample_ids": ["vfYTJq7nU", "yW6FWLSLkx4"], "start_seconds": ["130", "40"], "properties": ["rustling, ducks, quack", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a man speaks followed by another man speaking outside"], "sample_ids": ["xKB8O8LTs6s", "viuTg1M-dqg"], "start_seconds": ["70", "30"], "properties": ["music, radio, gunshots", "two men, speak, follow"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a woman speaking on a radio?", "label": 0}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["xl2PIWyXaM", "zFjIWfSD-4"], "start_seconds": ["160", "410"], "properties": ["chirp, man, younger person", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and people are talking", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running?", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "a car accelerates and wind blows"], "sample_ids": ["zcDwZ6W7E3E", "u0TrcHhkPQ"], "start_seconds": ["180", "20"], "properties": ["a, man, speak", "accelerates, wind, blows"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", null], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a race car accelerates and revs its engine "], "question": "which is not a person speaking", "label": 1}, {"captions": ["a clock ticktocks continuously", "a duck quacks continuously"], "sample_ids": ["vlJS7LN2XyM", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, ticktocks continuously", "quacks, continuously, duck"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a ticktock of a clock", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a person is snoring while sleeping", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vJrjSeP17yE", "yajyRTUQk3U"], "start_seconds": ["40", "400"], "properties": ["a person is sleeping, snoring, person", "a woman, something, fried"], "captions_pred_video": ["a black background with a small plane flying in the sky", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 0}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["wnpJndXuxLc", "tDVADusiIoc"], "start_seconds": ["50", "60"], "properties": ["beeps, loud, whistle", "water, radio, man"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["uiItxDsDMFI", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["wood, piece, saw", "rooster, crow, background, men"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a saw is being used with background noise ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "people cheer as a vehicle engine revs"], "sample_ids": ["s4Uz1Ffgo04", "xjhAnI2q6hM"], "start_seconds": ["100", "6"], "properties": ["roars, background, people speaking", "engine revs, vehicle, people"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a truck is revving its engine and a man is speaking "], "question": "which vehicle is revving", "label": 1}, {"captions": ["a woman speaks and is crumpling paper", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["xvDdE3zNf8Y", "su6FAOcOA8c"], "start_seconds": ["120", "4"], "properties": ["A, crumple, paper", "engine, idle, woman"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a woman speaks and crumples paper", "a woman is speaking and a subway train is moving "], "question": "which woman is speaking", "label": 1}, {"captions": ["a motor runs and stops, and animals squawk and croak", "small dogs yip and bark sharply"], "sample_ids": ["s4tUs779vBA", "v-wcQf4BDY0"], "start_seconds": ["160", "120"], "properties": ["a, sound, stop", "bark, yip, sharply"], "captions_pred_video": ["game in 10 words or less - screenshot 1", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a car is revving and a man is speaking ", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["an airplane engine runs", "wind blows as people chatter quietly"], "sample_ids": ["yVPZ2MNWpms", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["engine, airplane, runs", "wind, chatter, people"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "footage is blurry and out of focus"], "captions_pred_audio": ["a car is driving by on the road ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["wqUmIEzuNz4", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["frog, bird, vocalize", "a woman, laughs, animal"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a cat meows and rustles", "a woman is speaking and a baby is crying"], "question": "which entity is a human", "label": 1}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "several insects fly while two men talk"], "sample_ids": ["smDKStoHBJo", "s-T9OVOiMLo"], "start_seconds": ["0", "330"], "properties": ["a, infant, speaking", "several, fly, men"], "captions_pred_video": ["a man holding a crying baby in his arms", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["wind blows and a stream of water flows nearby", "a car speeding up in the distance"], "sample_ids": ["sYITalLZjj4", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["stream, flow, wind", "distance, car, speed"], "captions_pred_video": ["two ducks are swimming in the water near each other", null], "captions_pred_audio": ["wind blows and birds chirp", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "a propeller rotates loudly and intensely"], "sample_ids": ["wqZ135Ssz0", "ugHJF0hfYkg"], "start_seconds": ["60", "10"], "properties": ["two men, woman, birds", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a helicopter is flying overhead "], "question": "which entity is more quiet", "label": 0}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a train engine runs and a horn blows"], "sample_ids": ["zY3icUyMdh8", "zPX9o1uDiI"], "start_seconds": ["20", "40"], "properties": ["dog, bark, engine", "engine, horn, run"], "captions_pred_video": ["footage of a bus driving through a residential street at night", null], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a train moves with its horn blowing and wheels squealing "], "question": "which entity is a train", "label": 1}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "a stream of water flows as people talk and wind blows"], "sample_ids": ["w9lpbUn0hPc", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["male, wind, rustling", "stream, water, flow"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing?", "label": 1}, {"captions": ["wind blowing followed by a zoom", "pigeons vocalize and birds chirp"], "sample_ids": ["vr8ZXjEBhMQ", "uiS58TNyUiw"], "start_seconds": ["150", "430"], "properties": ["wind, blow, zoom", "vocalize, bird, chirp"], "captions_pred_video": ["is taken from a motorcycle's point of view", "of the pigeon in the cage"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["an aircraft engine runs", "roadway noise occurs and a truck accelerates"], "sample_ids": ["yLCORCnd35Q", "tgbONvsP47Y"], "start_seconds": ["0", "0"], "properties": ["engine, aircraft, runs", "noise, truck, accelerate"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a car is driving on the road "], "question": "which is not a source of noise", "label": 0}, {"captions": ["an airplane engine runs", "a man talks as several small engines run"], "sample_ids": ["yVPZ2MNWpms", "u9A6VZQCZpU"], "start_seconds": ["0", "30"], "properties": ["engine, airplane, runs", "a, man, talk"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", null], "captions_pred_audio": ["a car is driving by on the road ", "a man is speaking while a race car is revving and accelerating "], "question": "which entity is a person", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "someone is typing on a computer keyboard"], "sample_ids": ["vzceMbklWc", "v0x1odnXtP0"], "start_seconds": ["180", "210"], "properties": ["water, faucet, sink", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["water is running and a man is speaking", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard?", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "a man speaks as a car is passing by"], "sample_ids": ["w5W5Kqtc8E", "sK4u5T8hW78"], "start_seconds": ["100", "30"], "properties": ["wind, blow, vehicle", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "water flows as men speak and yell"], "sample_ids": ["zhx6hoYrHeI", "vJ7JPEFhyLA"], "start_seconds": ["160", "16"], "properties": ["engine, sputter, rough", "water, flow, men"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a man speaking and yelling?", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "paper is crumpling consistently"], "sample_ids": ["w2M4i1mklOA", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["alarm, gears, turn", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of an antique clock", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a vehicle engine revs and tires squeal", "paper folding and crinkling"], "sample_ids": ["yDoT73BWsdA", "zPpG3RD8lSs"], "start_seconds": ["10", "20"], "properties": ["engine revs, tires squeal, vehicle", "paper, fold, crinkle"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "the wind blows and a mouse clicks "], "question": "which entity is not a vehicle?", "label": 1}, {"captions": ["wind blows as people chatter quietly", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["xBxDz0CFVn0", "ziUT9IFTkjg"], "start_seconds": ["30", "10"], "properties": ["wind, chatter, people", "background, birds, rustling"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 0}, {"captions": ["birds vocalize and a man speaks", "plastic is tapped on while someone speaks"], "sample_ids": ["v0wPrLBI3hg", "wvKpEYswXO0"], "start_seconds": ["30", "150"], "properties": ["vocalize, bird, speak", "plastic, tap, speak"], "captions_pred_video": ["footage of the pigeons feeding on the ground", "of the person preparing food in the kitchen"], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "male speech followed by light wind, rustling, distant speech and brief hissing"], "sample_ids": ["tDlfY3nmx1A", "w9lpbUn0hPc"], "start_seconds": ["160", "30"], "properties": ["applause, laugh, man", "male, wind, rustling"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "footage of a man in a black shirt standing in front of a white truck in a parking lot"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a man is speaking with wind noise and breathing sounds in the background "], "question": "which entity is a speech?", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "a horn rings out as a machine runs by"], "sample_ids": ["w5W5Kqtc8E", "slZLHwNbbt4"], "start_seconds": ["100", "300"], "properties": ["wind, engine, scream", "a, horn, run"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "water flows as men speak and yell"], "sample_ids": ["ylpYOorfH4o", "vJ7JPEFhyLA"], "start_seconds": ["410", "16"], "properties": ["motor, run, steady", "water, flow, men"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["a woman talks while something is fried and objects are tapped", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["yajyRTUQk3U", "tDVADusiIoc"], "start_seconds": ["400", "60"], "properties": ["a woman, something, fried", "water, radio, man"], "captions_pred_video": ["- a woman cooking in the kitchen", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a woman talking?", "label": 0}, {"captions": ["a car speeding up in the distance", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["u0TrcHhkPQ", "uYT5gxnyMWM"], "start_seconds": ["20", "50"], "properties": ["distance, car, speed", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking and a baby is crying"], "question": "which entity is not a car?", "label": 1}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wnpJndXuxLc", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["beeps, loud, whistle", "three men, wind, flow"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "a clock ticktocks"], "sample_ids": ["s4Uz1Ffgo04", "v-g-j2uTByM"], "start_seconds": ["100", "30"], "properties": ["water, rushes, vehicle", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a clock is ticking loudly"], "question": "which entity is a clock?", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "an engine runs loudly"], "sample_ids": ["vlS6YMeWAPo", "vqZuVbG6-HI"], "start_seconds": ["40", "130"], "properties": ["noise, bleat, call", "loud, engine, run"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a goat bleats and birds chirp", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks followed by another man speaking outside", "small dogs yip and bark sharply"], "sample_ids": ["viuTg1M-dqg", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["two men, speak, follow", "bark, yip, sharply"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["continuous sneezing together with speech", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["x4dZyf9Gbj0", "tDVADusiIoc"], "start_seconds": ["130", "60"], "properties": ["continuous, sneeze, speech", "water, radio, man"], "captions_pred_video": ["footage is blurry and out of focus", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman sneezes and speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sofxkNWaP0s", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["wind, engine, louder", "rooster, crow, background, men"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "a clock ticktocks"], "sample_ids": ["zofjfKhqLk8", "v-g-j2uTByM"], "start_seconds": ["10", "30"], "properties": ["background, metal, clank", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "wind blows as people chatter quietly"], "sample_ids": ["vzxHnu-SFEw", "xBxDz0CFVn0"], "start_seconds": ["80", "30"], "properties": ["two objects, woman, speak", "wind, chatter, people"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "a man speaks as birds chirp and a vehicle passes nearby"], "sample_ids": ["sEprKHm8Sj8", "siJFXfGWgDk"], "start_seconds": ["90", "50"], "properties": ["car, tires, slows", "a, bird, vehicle"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and birds are chirping in the background "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a woman speaks and other women and a man talk with her", "a toilet flushes and a female speaks"], "sample_ids": ["vbpKkWvfOu4", "yaln9y8I7ms"], "start_seconds": ["560", "230"], "properties": ["a, woman, man", "female, flushes, toilet"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a toilet flushes and a man speaks"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["a race car approaches quickly and slows down squealing tires", "someone is typing on a computer keyboard"], "sample_ids": ["sEprKHm8Sj8", "v0x1odnXtP0"], "start_seconds": ["90", "210"], "properties": ["car, tires, slows", "keyboard, type, computer"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "how to make money on youtube in spanish"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a person is typing on a keyboard"], "question": "which object is stationary", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["wvKpEYswXO0", "uEU-Hg5MTN8"], "start_seconds": ["150", "27"], "properties": ["sound, water, running", "a woman, laughs, animal"], "captions_pred_video": ["of the person preparing food in the kitchen", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking softly?", "label": 0}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "a person snores loudly multiple times at a close distance"], "sample_ids": ["zkKdxzNC97Y", "sSMl2vc3ek"], "start_seconds": ["27", "20"], "properties": ["loud, bang, noise", "loud, multiple, distance"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "a person snoring loudly"], "question": "which entity is louder", "label": 0}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "people cheer as a vehicle engine revs"], "sample_ids": ["xNMovAf3o50", "xjhAnI2q6hM"], "start_seconds": ["0", "6"], "properties": ["rain, thunder, music", "engine revs, vehicle, people"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["thunder and rain with music playing in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "winds blows roughly as a vehicle races past"], "sample_ids": ["vveS8HT7Uog", "xjvTpk2Zpr8"], "start_seconds": ["100", "70"], "properties": ["a man, objects, speak", "wind, blows, vehicle"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "a loud engine muffles a man as he speaks"], "sample_ids": ["v7jJS8aAyA", "xyx6eNVEYRY"], "start_seconds": ["10", "380"], "properties": ["wind, blows, loudly", "loud, engine, muffles"], "captions_pred_video": [null, "footage of a helicopter landing on a runway at an airport"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "an aircraft engine is running and a man is speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a dark barks and whimpers", "a man talks nearby and another man talks far away while some liquid flows"], "sample_ids": ["sYj4hpDUZDQ", "sapQIQUhFc"], "start_seconds": ["30", "280"], "properties": ["barks, whimpers, dark", "liquid, flow, distance"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", null], "captions_pred_audio": ["a dog barks and a cat meows", "a man is speaking and a stream is flowing in the background "], "question": "which entity is more distant", "label": 1}, {"captions": ["birds vocalize and chirp continuously", "an insect buzzes around continuously"], "sample_ids": ["w1mlz3Pe4fU", "v25l1jef3JY"], "start_seconds": ["300", "0"], "properties": ["vocalize, chirp, continuously", "buzzes, continuously, insect"], "captions_pred_video": ["of a bird in a cage", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["birds are chirping and singing", "a fly is buzzing around a microphone "], "question": "which entity is a type of insect", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["xV7Mg1QucSc", "ukg5L09Wpvo"], "start_seconds": ["14", "150"], "properties": ["alarm, ticktocks, laughs", "clickety-clack, train, whistle"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vqZuVbG6-HI", "uEU-Hg5MTN8"], "start_seconds": ["130", "27"], "properties": ["background, male, female", "a woman, laughs, animal"], "captions_pred_video": ["footage is blurry because it's raining outside", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking and an animal snorting?", "label": 1}, {"captions": ["someone sprays a liquid onto a hard surface making a hiss sound", "a stream of water flows as people talk and wind blows"], "sample_ids": ["zO-LSSY92ZM", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["liquid, surface, sound", "stream, water, flow"], "captions_pred_video": ["youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'", "footage is blurry and out of focus"], "captions_pred_audio": ["steam is hissing and hissing", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "a person snores loudly multiple times at a close distance"], "sample_ids": ["sLUnaPT5gM8", "sSMl2vc3ek"], "start_seconds": ["0", "20"], "properties": ["loud, laughter, intermittent", "loud, multiple, distance"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", null], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["an airplane engine spools and people speak", "multiple people speak and children yell while water gurgles"], "sample_ids": ["wTjoRj1se3U", "vb1fPSDI4c"], "start_seconds": ["390", "30"], "properties": ["airplane, engine, spool", "multiple, people, yell"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", null], "captions_pred_audio": ["a jet engine is running and people are talking", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "winds blows roughly as a vehicle races past"], "sample_ids": ["sG7TyPnFDR0", "xjvTpk2Zpr8"], "start_seconds": ["180", "70"], "properties": ["beeps, machine, smoke alarm", "wind, blows, vehicle"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["xfaoyyzw2WU", "yajyRTUQk3U"], "start_seconds": ["180", "400"], "properties": ["loud, jet engine, roar", "a woman, something, fried"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "- a woman cooking in the kitchen"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "sucking and grunting followed by slurping with birds in the background"], "sample_ids": ["wnpJndXuxLc", "yYEVLuqEytU"], "start_seconds": ["50", "40"], "properties": ["blows, vehicle, train", "grunt, slurp, background"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a baby goat is being petted by a person's hand"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "several sheep bleat and a man speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vZAw4apG0Es", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["background, tick, repeat", "rooster, crow, background, men"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["birds chirp then an animal grunts", "a mechanical buzzing getting louder"], "sample_ids": ["tDlysoZiA1I", "sEprKHm8Sj8"], "start_seconds": ["0", "90"], "properties": ["animal, grunt, chirp", "noise, loud, buzzing"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a race car accelerates and revs its engine "], "question": "which entity is a noise", "label": 1}, {"captions": ["ticking continues without interruption", "a horn rings out as a machine runs by"], "sample_ids": ["v-g-j2uTByM", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["ticking, continuous, clock", "a, horn, run"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a clock is ticking loudly", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is not continuous", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["siJFXfGWgDk", "tDVADusiIoc"], "start_seconds": ["50", "60"], "properties": ["a, bird, vehicle", "water, radio, man"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "ducks quack continuously"], "sample_ids": ["vbr9mHKc8WM", "sNB8zxXneIM"], "start_seconds": ["40", "20"], "properties": ["noise, loudness, engine", "quack, duck, continuously"], "captions_pred_video": [null, "a group of geese in a cage"], "captions_pred_audio": ["an engine is idling", "a rooster is crowing and wind is blowing "], "question": "which entity makes noise continuously", "label": 1}, {"captions": ["an electronic device bleeps once", "a man speaks as a car is passing by"], "sample_ids": ["tHJ6JSa8Y4", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["bleeps, electronic, device", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a clock is ticking and beeping", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "some men converse over an engine running"], "sample_ids": ["sWZzXuWYY", "sCiy7QS1U"], "start_seconds": ["420", "300"], "properties": ["male, speech, banging", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a clock ticktocks briefly", "an insect buzzes around continuously"], "sample_ids": ["u7C-AEBQM", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["ticktocks, clock, ticktocks briefly", "buzzes, continuously, insect"], "captions_pred_video": [null, "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a ticktock of a clock", "a fly is buzzing around a microphone "], "question": "which entity buzzes continuously", "label": 1}, {"captions": ["loud, continuous burping", "birds chirp in the background while a horse neighs followed by a girl speaking"], "sample_ids": ["y636gklDioE", "s59PfAghdkM"], "start_seconds": ["20", "0"], "properties": ["loud, continuous, burping", "bird, chirp, background, horse, neigh"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "is an anime scene featuring two people and a horse in the foreground and a fence in the background"], "captions_pred_audio": ["a person burps loudly several times", "birds are chirping a horse is neighing and a woman is speaking "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "some men converse over an engine running"], "sample_ids": ["wz7N8YRy74I", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["rooster, crow, background, people", "men, converse, engine"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", null], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a rooster in it?", "label": 0}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "frogs croak and vocalize"], "sample_ids": ["wz7N8YRy74I", "yswmmRZFItk"], "start_seconds": ["30", "0"], "properties": ["rooster, crow, background, people", "croak, vocalize, frog"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "a close up of a frog in the water"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a frog is croaking"], "question": "which animal is speaking", "label": 1}, {"captions": ["paper is crumpling consistently", "winds blows roughly as a vehicle races past"], "sample_ids": ["v5cSxLaHADY", "xjvTpk2Zpr8"], "start_seconds": ["0", "70"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "wind, blows, vehicle"], "captions_pred_video": ["footage of the person holding a pair of scissors", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["paper is crumpled and crinkled", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["plastic is tapped on while someone speaks", "some tunes played by whistling"], "sample_ids": ["wvKpEYswXO0", "u6BnG6YZqJ4"], "start_seconds": ["150", "0"], "properties": ["plastic, tap, speak", "tune, play, whistling"], "captions_pred_video": ["of the person preparing food in the kitchen", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a person whistling a song"], "question": "which entity is played by whistling", "label": 1}, {"captions": ["an engine runs and wind blows", "people speak as gunfire rings out"], "sample_ids": ["vs65y4qmyBE", "wqTCwqVRDlk"], "start_seconds": ["340", "80"], "properties": ["engine, run, wind", "gunfire, ring, speak"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "children cheer as a man speaks then an audience screams"], "sample_ids": ["u7C-AEBQM", "vJvryTwuAV8"], "start_seconds": ["30", "16"], "properties": ["ticks, rhythmic, quiet", "audience, cheer, man"], "captions_pred_video": [null, "a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and a crowd is shouting and whooping "], "question": "which entity is more active", "label": 1}, {"captions": ["birds chirp and a dog breathes heavily", "vehicles pass by on a roadway"], "sample_ids": ["y2ZBGpgbhHM", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["dog, chirp, breathe", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["birds chirping and a dog panting", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaking with light rustling", "a motor idles, accelerates, then slows down."], "sample_ids": ["zOZleIRqZm4", "vYkA3cfXp5Q"], "start_seconds": ["80", "30"], "properties": ["light, rustling, man", "speed, idle, accelerate"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "an engine is idling"], "question": "which entity is a machine", "label": 1}, {"captions": ["heavy rain splashes as it falls", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["wP8ZKrlx3oA", "yDoT73BWsdA"], "start_seconds": ["40", "10"], "properties": ["fall, rain, splash", "engine, revs, vehicle"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a race car accelerates and revs its engine "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vbZ-0lGPneg", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["a woman, a television program, a bird", "female, spraying, scream"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a woman is speaking and a baby is crying"], "question": "which entity has a woman talking?", "label": 0}, {"captions": ["a fly buzzes around loudly as birds chirp", "a vehicle engine accelerating then running on idle"], "sample_ids": ["uJV8NDaHqqk", "vYkA3cfXp5Q"], "start_seconds": ["100", "30"], "properties": ["loud, fly, chirp", "engine, accelerate, idle"], "captions_pred_video": ["a bee hive in a wooden box", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a swarm of bees buzzing around", "an engine is idling"], "question": "which entity is not a fly?", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "a child speaks in closed space"], "sample_ids": ["sEprKHm8Sj8", "yW6FWLSLkx4"], "start_seconds": ["90", "40"], "properties": ["car, tires, slows", "child, space, speak"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "wind blows strongly"], "sample_ids": ["w5W5Kqtc8E", "w8uLijTqtlU"], "start_seconds": ["100", "70"], "properties": ["water, splashes, motorboat", "wind, blows, strongly"], "captions_pred_video": [null, "footage is blurry and shaky"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "the wind is blowing strongly"], "question": "which entity is more likely to blow strongly", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "bird squawks are accompanied by a man and woman speaking"], "sample_ids": ["yVumC9TGknc", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["humming, clock, birds", "man, woman, squawks"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", null], "captions_pred_audio": ["a series of beeps and chirps", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has a clock ticking?", "label": 0}, {"captions": ["an engine sputters followed by a car zooming by", "a vehicle engine accelerating then running on idle"], "sample_ids": ["u5RmF3c3Aw", "vYkA3cfXp5Q"], "start_seconds": ["60", "30"], "properties": ["engine, car, zoom", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "material crumbles into a microphone"], "sample_ids": ["sG7TyPnFDR0", "vofpvUo6NAw"], "start_seconds": ["180", "220"], "properties": ["beeps, machine, smoke alarm", "material, crumbles, microphone"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "person wrapping a toy car in a plastic bag"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "paper is being crumpled and crinkled"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "paper is crumpling consistently"], "sample_ids": ["wTideSjRFS0", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["food, sizzle, woman", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "water is sprayed across a hard surface"], "sample_ids": ["vZAw4apG0Es", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["people, clock, converse", "water, spray, surface"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a clock is ticking and people are talking", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "a woman speaks as she rubs two objects together"], "sample_ids": ["v7jJS8aAyA", "vzxHnu-SFEw"], "start_seconds": ["10", "80"], "properties": ["wind, blows, loudly", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is silent", "label": 1}, {"captions": ["a woman and man are speaking", "a horn rings out as a machine runs by"], "sample_ids": ["vbpKkWvfOu4", "slZLHwNbbt4"], "start_seconds": ["560", "300"], "properties": ["two people, speaking, woman, man", "a, horn, run"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is about a machine running by?", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["spJCm8tD9Zo", "uEU-Hg5MTN8"], "start_seconds": ["90", "27"], "properties": ["snores, wheezes, sleeps", "a woman, laughs, animal"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 0}, {"captions": ["a power tool runs and touches a surface", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zfvPRf3chY", "tdWhHV3X25Q"], "start_seconds": ["290", "60"], "properties": ["power tool, run, touch", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking while a power tool is being used ", "a man is speaking and a crowd is clapping"], "question": "which is not a power tool", "label": 1}, {"captions": ["someone whistles a tune", "a person speaks over rustling leaves"], "sample_ids": ["sIXTftIuUgw", "zOZleIRqZm4"], "start_seconds": ["90", "80"], "properties": ["someone, tune, whistle", "rustling, leaves, person"], "captions_pred_video": [null, "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a person whistling a song", "a man is speaking with crickets chirping in the background"], "question": "which entity is a person speaking over rustling leaves?", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "a duck quacks continuously"], "sample_ids": ["wz7N8YRy74I", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["rooster, crow, background, men", "quacks, continuously, duck"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a duck is quacking loudly"], "question": "which animal is speaking", "label": 1}, {"captions": ["a woman sneezes then speaks", "water is sprayed across a hard surface"], "sample_ids": ["x4dZyf9Gbj0", "sQwlkXjQabo"], "start_seconds": ["130", "10"], "properties": ["sneezes, speaks, woman", "water, spray, surface"], "captions_pred_video": ["footage is blurry and out of focus", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a woman sneezes and speaks", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vlS6YMeWAPo", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["noise, bleat, call", "three men, wind, flow"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a goat bleats and birds chirp", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a goat?", "label": 0}, {"captions": ["a vehicle engine revs as the vehicle passes", "a child speaks in closed space"], "sample_ids": ["yDoT73BWsdA", "yW6FWLSLkx4"], "start_seconds": ["10", "40"], "properties": ["engine, revs, vehicle", "child, space, speak"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["someone whistles briefly", "paper is crumpling consistently"], "sample_ids": ["uFoga8sHpiw", "v5cSxLaHADY"], "start_seconds": ["90", "0"], "properties": ["sound, duration, pitch", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a bird in a cage", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a person whistles a song", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "a frog croaks as other frogs croak in the background"], "sample_ids": ["zgUgkpk78xU", "yswmmRZFItk"], "start_seconds": ["70", "0"], "properties": ["horn, bells, ring", "background, frog, croak"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "a close up of a frog in the water"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a frog is croaking"], "question": "which entity is a warning", "label": 0}, {"captions": ["a man talks while vehicles pass by", "a woman speaks happily and an animal chirps"], "sample_ids": ["sK4u5T8hW78", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["a, man, talk", "a woman, chirps, animal"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking and a dog is barking "], "question": "which entity is more active", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "a bell dings followed by a loud horn blaring"], "sample_ids": ["zl9Dqx-j7q4", "tZGN5a7ybxo"], "start_seconds": ["6", "60"], "properties": ["motors rev, laugh, loudly", "a bell, a horn, a ding"], "captions_pred_video": ["footage of a man driving a car in the dark", "is taken from a moving vehicle on the train tracks"], "captions_pred_audio": ["a jet engine roars ", "a train is moving and blowing its horn "], "question": "which entity is louder", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sNB8zxXneIM", "uZesmtKZGSw"], "start_seconds": ["20", "250"], "properties": ["several, quack, cocks", "men, talk, cars"], "captions_pred_video": ["a group of geese in a cage", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a stream of water runs briefly", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["x-PeY8Yb8M4", "zl9Dqx-j7q4"], "start_seconds": ["300", "6"], "properties": ["stream, water, run", "engine, laugh, loud"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a car is driving on a wet road ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "multiple adults speaking, and a child shouting in the background"], "sample_ids": ["ukg5L09Wpvo", "yks4cLgIDMc"], "start_seconds": ["150", "170"], "properties": ["clickety-clack, train, whistle", "background, speaking, child"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "footage of two kids wrestling on the floor"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a man is speaking and a child is crying"], "question": "which entity has a child shouting in the background?", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "a woman speaks happily and an animal chirps"], "sample_ids": ["v5P-ThUCINM", "uWAAAL4CIoc"], "start_seconds": ["400", "0"], "properties": ["background, chirp, bird", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and birds are chirping", "a woman is speaking and a dog is barking "], "question": "which entity has a chirpy animal?", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wwyfGO2J4", "zj2R0XoFr5k"], "start_seconds": ["90", "50"], "properties": ["people, applaud, hoot", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["water running down a sink while a man is talking", "a toilet flushes and water drains"], "sample_ids": ["vSeGhaZt-aI", "sfAvvZwdLCY"], "start_seconds": ["50", "20"], "properties": ["water, sink, talk", "water drains, flushes, water"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a toilet is flushed"], "question": "which entity is draining water", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "an infant crying as a woman laughs"], "sample_ids": ["xBxDz0CFVn0", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["stream, water, flow", "a, laugh, infant"], "captions_pred_video": ["footage is blurry and out of focus", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "wind blowing followed by a zoom"], "sample_ids": ["vs65y4qmyBE", "vr8ZXjEBhMQ"], "start_seconds": ["340", "150"], "properties": ["engine, run, man", "wind, blow, zoom"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a dog barks and whimpers", "pigeons vocalize and birds chirp"], "sample_ids": ["sShpyu2l4YQ", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["barks, whimpers, dog", "vocalize, bird, chirp"], "captions_pred_video": ["the puppies are playing with a toy", "of the pigeon in the cage"], "captions_pred_audio": ["a dog is barking and growling", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "an infant crying as a woman laughs"], "sample_ids": ["zhx6hoYrHeI", "xhmRY9yhC7c"], "start_seconds": ["160", "20"], "properties": ["engine, sputter, rough", "a, laugh, infant"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "a clock ticktocks"], "sample_ids": ["vzceMbklWc", "v-g-j2uTByM"], "start_seconds": ["180", "30"], "properties": ["water, faucet, sink", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["water is running and a man is speaking", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "a person snores loudly multiple times at a close distance"], "sample_ids": ["sShpyu2l4YQ", "sSMl2vc3ek"], "start_seconds": ["0", "20"], "properties": ["growl, bark, yip", "loud, multiple, distance"], "captions_pred_video": ["the puppies are playing with a toy", null], "captions_pred_audio": ["a dog is barking and growling", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["people speak softly as food sizzles", "people cheer as a vehicle engine revs"], "sample_ids": ["yhQ2Lg-7qDY", "xjhAnI2q6hM"], "start_seconds": ["130", "6"], "properties": ["food, sizzle, speak", "engine revs, vehicle, people"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "water flows and trickles"], "sample_ids": ["tDVADusiIoc", "tB7hWb9gTuQ"], "start_seconds": ["60", "30"], "properties": ["water, radio, man", "water, flow, trickle"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["uKCSGgof8gI", "ukg5L09Wpvo"], "start_seconds": ["12", "150"], "properties": ["chirps, distance, signal", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["food is frying while a woman speaks", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["yhQ2Lg-7qDY", "zFjIWfSD-4"], "start_seconds": ["130", "410"], "properties": ["food, woman, speak", "People, motor, brakes"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", null], "captions_pred_audio": ["a faucet is running and a man is speaking", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a woman sneezes then speaks", "an infant crying frantically"], "sample_ids": ["x4dZyf9Gbj0", "zwOBqeFTgiU"], "start_seconds": ["130", "30"], "properties": ["sneezes, speaks, woman", "cry, infant, frantically"], "captions_pred_video": ["footage is blurry and out of focus", "of the baby crying in the car seat"], "captions_pred_audio": ["a woman sneezes and speaks", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "a man speaks followed by another man speaking outside"], "sample_ids": ["w5W5Kqtc8E", "viuTg1M-dqg"], "start_seconds": ["100", "30"], "properties": ["water, splashes, motorboat", "two men, speak, follow"], "captions_pred_video": [null, "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking with background noise and breathing sounds "], "question": "which entity shows two men speaking?", "label": 1}, {"captions": ["a motorcycle engine is idling", "a woman speaks happily and an animal chirps"], "sample_ids": ["vZAqdHZ81yA", "uWAAAL4CIoc"], "start_seconds": ["180", "0"], "properties": ["engine, motorcycle, idling", "a woman, chirps, animal"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", null], "captions_pred_audio": ["an engine is idling loudly", "a woman is speaking and a dog is barking "], "question": "which entity is not a motorcycle?", "label": 1}, {"captions": ["a motorcycle engine works nearby", "continuous snoring"], "sample_ids": ["tOSWIURC-4", "sLkeqCDJIyw"], "start_seconds": ["0", "120"], "properties": ["engine, work, nearby", "loud, snoring, noise"], "captions_pred_video": [null, ", what is the man doing on the couch? sleeping"], "captions_pred_audio": ["a lawn mower is running ", "a person is snoring loudly"], "question": "which entity makes a loud noise", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "several insects fly while two men talk"], "sample_ids": ["u21-Z5gJCB8", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["background, voice, man", "several, fly, men"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a helicopter engine runs continuously", "an engine runs loudly"], "sample_ids": ["ugHJF0hfYkg", "vqZuVbG6-HI"], "start_seconds": ["10", "130"], "properties": ["engine, running, continuously", "loud, engine, run"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a helicopter is flying overhead ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "a car speeding up in the distance"], "sample_ids": ["xZepNM9qcRA", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["background, motor, run", "distance, car, speed"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", null], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["several ducks quack and cocks crow far away", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["sNB8zxXneIM", "w5W5Kqtc8E"], "start_seconds": ["20", "100"], "properties": ["several, quack, cocks", "wind, blow, vehicle"], "captions_pred_video": ["a group of geese in a cage", null], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blowing?", "label": 1}, {"captions": ["a toilet flushes and water sputters as it drains", "winds blows roughly as a vehicle races past"], "sample_ids": ["smGI3C1NZc", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["water, drain, toilet", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a toilet is flushed", "a jet engine roars and wind blows "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a person is whistling", "a helicopter engine runs continuously"], "sample_ids": ["sIXTftIuUgw", "ugHJF0hfYkg"], "start_seconds": ["90", "10"], "properties": ["person, whistling, person", "engine, running, continuously"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a person whistling a song", "a helicopter is flying overhead "], "question": "which entity is not whistling", "label": 1}, {"captions": ["people clap and speak in the distance", "a man speaks as a motor runs in the background"], "sample_ids": ["wwyfGO2J4", "xZepNM9qcRA"], "start_seconds": ["90", "30"], "properties": ["clap, distance, speak", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "some liquid flows while a woman laughs and man talks"], "sample_ids": ["uKCSGgof8gI", "vddP56-ogds"], "start_seconds": ["12", "30"], "properties": ["chirps, distance, signal", "liquid, laughs, man"], "captions_pred_video": ["footage of a street in a small town on a sunny day", null], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "water is running and gurgling and a man is speaking"], "question": "which entity is more likely to be a video of a woman laughing?", "label": 1}, {"captions": ["water flows as men speak and yell", "some tunes played by whistling"], "sample_ids": ["vJ7JPEFhyLA", "u6BnG6YZqJ4"], "start_seconds": ["16", "0"], "properties": ["water, flow, men", "tune, play, whistling"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a dark barks and whimpers", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sYj4hpDUZDQ", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["barks, whimpers, dark", "airplane, boy, fly"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a dog barks and a cat meows", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a child babbles as a woman speaks", "a person is burping then speaks and laughs"], "sample_ids": ["wEBlkGWVWwE", "wAAkbZToh8"], "start_seconds": ["260", "0"], "properties": ["a, babble, woman", "burp, laugh, speak"], "captions_pred_video": ["shows a person writing on the whiteboard", null], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man burps and a woman speaks"], "question": "which entity is speaking", "label": 1}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "a woman speaks as she rubs two objects together"], "sample_ids": ["wSVhSdj0F0", "vzxHnu-SFEw"], "start_seconds": ["10", "80"], "properties": ["beep, clang, footsteps", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a skill", "label": 1}, {"captions": ["roadway noise occurs and a truck accelerates", "some liquid flows while a woman laughs and man talks"], "sample_ids": ["tgbONvsP47Y", "vddP56-ogds"], "start_seconds": ["0", "30"], "properties": ["noise, truck, accelerate", "liquid, laughs, man"], "captions_pred_video": ["footage of a fire truck entering a garage", null], "captions_pred_audio": ["a car is driving on the road ", "water is running and gurgling and a man is speaking"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["a stream runs then someone speaks", "a child speaks in closed space"], "sample_ids": ["wbHTKEJZyhc", "yW6FWLSLkx4"], "start_seconds": ["20", "40"], "properties": ["stream, run, someone", "child, space, speak"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["y2bVZ7rz-5M", "wz7N8YRy74I"], "start_seconds": ["280", "30"], "properties": ["engine, horn, siren", "rooster, crow, background, men"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "water flows as men speak and yell"], "sample_ids": ["ukg5L09Wpvo", "vJ7JPEFhyLA"], "start_seconds": ["150", "16"], "properties": ["a train, a horn, a bell", "water, flow, men"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more likely to be in motion", "label": 0}, {"captions": ["a woman speaks and then a man speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["vbpKkWvfOu4", "wwyfGO2J4"], "start_seconds": ["560", "90"], "properties": ["a, man, speaks", "people, applaud, hoot"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "a horn rings out as a machine runs by"], "sample_ids": ["vqZuVbG6-HI", "slZLHwNbbt4"], "start_seconds": ["130", "300"], "properties": ["background, male, female", "a, horn, run"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity has a horn", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "a man talks while metallic objects are rapped and steam is released"], "sample_ids": ["vf9xf3vMsGM", "vuUVPzd2FXw"], "start_seconds": ["540", "160"], "properties": ["A man speaks while turning a water faucet on.", "a, steam, release"], "captions_pred_video": ["of the person washing their hands under the faucet", "of the person cooking on the grill with a spatula"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a man is speaking and dishes are clanging"], "question": "which entity is about water?", "label": 0}, {"captions": ["a vehicle accelerates and squeals tires", "a child speaks in closed space"], "sample_ids": ["yRx9txMcBl0", "yW6FWLSLkx4"], "start_seconds": ["40", "40"], "properties": ["accelerates, tires, squeals", "child, space, speak"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "a music is played followed by a frog croaking and then music is played again"], "sample_ids": ["vh30P49Po6s", "voJh2gJxXhA"], "start_seconds": ["30", "50"], "properties": ["loud, continuous, quacks", "music, frog, croak"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a frog on a black background with a red diamond in the center"], "captions_pred_audio": ["a duck is quacking loudly", "music is playing and crickets are chirping "], "question": "which entity is quieter", "label": 1}, {"captions": ["a door opens and birds chirp", "a car accelerates and wind blows"], "sample_ids": ["yeFvk9x0wWI", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["door, open, birds", "accelerates, wind, blows"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", null], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["u21-Z5gJCB8", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["background, voice, man", "loud, laughter, intermittent"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["continuous sizzling with a woman speaking towards the end", "a small engine idles continuously"], "sample_ids": ["ukxt9I7eMMg", "y5WII6cTH7k"], "start_seconds": ["30", "40"], "properties": ["continuous, woman, speaking", "engine, idle, continuously"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of a sewing machine stitching a red and white hat"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "an engine is knocking and vibrating "], "question": "which entity is a video of a woman speaking?", "label": 0}, {"captions": ["water running down a sink while a man is talking", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vSeGhaZt-aI", "xBxDz0CFVn0"], "start_seconds": ["50", "30"], "properties": ["water, sink, talk", "stream, water, flow"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is flowing", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "a car accelerates and wind blows"], "sample_ids": ["vXlk0lIQBFo", "u0TrcHhkPQ"], "start_seconds": ["470", "20"], "properties": ["wind, speak, vocalize", "accelerates, wind, blows"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", null], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a person sniffles and sneezes", "a man speaks as a motor runs in the background"], "sample_ids": ["uRlbY6aoBU", "xZepNM9qcRA"], "start_seconds": ["0", "30"], "properties": ["sneezes, sniffles, person", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is sneezing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "water runs into a sink while men speak"], "sample_ids": ["x6ijhqRY38s", "vzceMbklWc"], "start_seconds": ["250", "180"], "properties": ["something metal, glass, hit", "water, sink, run"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", null], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "water is running and a man is speaking"], "question": "which entity is a video of water running into a sink?", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "a child speaks in closed space"], "sample_ids": ["voJh2gJxXhA", "yW6FWLSLkx4"], "start_seconds": ["50", "40"], "properties": ["music, frog, croak", "child, space, speak"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["music is playing and crickets are chirping ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a machine beeps continuously", "a car accelerates and wind blows"], "sample_ids": ["y682ml90jGw", "u0TrcHhkPQ"], "start_seconds": ["11", "20"], "properties": ["beeps, machine, continuously", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a train engine runs and a horn blows"], "sample_ids": ["su6FAOcOA8c", "zPX9o1uDiI"], "start_seconds": ["4", "40"], "properties": ["engine, run, woman", "engine, horn, run"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", null], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a train moves with its horn blowing and wheels squealing "], "question": "which entity has a horn", "label": 1}, {"captions": ["birds chirp as a train approaches", "an infant crying frantically"], "sample_ids": ["xM4joTqDVp4", "zwOBqeFTgiU"], "start_seconds": ["160", "30"], "properties": ["bird, chirp, train", "cry, infant, frantically"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "of the baby crying in the car seat"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a horn honks and then loudly blares", "waves crash against a shoreline and people speak"], "sample_ids": ["wnpJndXuxLc", "yFB25fqfU8I"], "start_seconds": ["50", "300"], "properties": ["horn, honk, loud", "wave, crash, shoreline"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more quiet", "label": 1}, {"captions": ["running water in a faucet with some clinks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["zNRChLjqcU", "vfYTJq7nU"], "start_seconds": ["220", "130"], "properties": ["water, faucet, run", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running from a faucet into a sink", "a duck quacks and a woman speaks"], "question": "which entity is about water?", "label": 0}, {"captions": ["people speak in a closed space", "a man speaks as a car is passing by"], "sample_ids": ["sTpirNYo8vQ", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["people, space, speak", "a, car, pass"], "captions_pred_video": ["of a man taking a selfie on a bus", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more likely to be in a closed space", "label": 0}, {"captions": ["dogs barking and whimpering", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["tIY7qOV3rEM", "xfaoyyzw2WU"], "start_seconds": ["0", "180"], "properties": ["barking, whimpering, dog", "loud, jet engine, roar"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["uKCSGgof8gI", "vbZ-0lGPneg"], "start_seconds": ["12", "30"], "properties": ["chirps, distance, signal", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program?", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["v0x1odnXtP0", "yajyRTUQk3U"], "start_seconds": ["210", "400"], "properties": ["keyboard, type, computer", "a woman, something, fried"], "captions_pred_video": ["how to make money on youtube in spanish", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a person is typing on a keyboard", "a woman is speaking while food is frying in the background"], "question": "which entity is a demonstration of cooking?", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "people cheer as a vehicle engine revs"], "sample_ids": ["vcmWSmvti8", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["music, man, fire", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["zofjfKhqLk8", "tiDFTC-5vU"], "start_seconds": ["10", "30"], "properties": ["background, metal, clank", "male, duck, laugh"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "race cars go around a track as a man commentates"], "sample_ids": ["wz7N8YRy74I", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["rooster, crow, background, people", "car, track, man"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["several beeps are followed by a hit and a woman talking", "paper folding and crinkling"], "sample_ids": ["w34HjHr6gAY", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["beeps, hit, woman", "paper, fold, crinkle"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "the wind blows and a mouse clicks "], "question": "which entity is a demonstration of folding and crinkling paper?", "label": 1}, {"captions": ["a person sniffs and sneezes", "waves crash against a shoreline and people speak"], "sample_ids": ["uRlbY6aoBU", "yFB25fqfU8I"], "start_seconds": ["0", "300"], "properties": ["sneezes, person, sniffs", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is sneezing ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "small dogs yip and bark sharply"], "sample_ids": ["uZesmtKZGSw", "v-wcQf4BDY0"], "start_seconds": ["250", "120"], "properties": ["men, talk, cars", "bark, yip, sharply"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["a diesel truck engine runs continuously", "a infant makes noise and is excited"], "sample_ids": ["sZvwOuuPGP0", "wIJK3-5y0kA"], "start_seconds": ["50", "30"], "properties": ["engine, diesel, truck", "noise, excited, infant"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a medium engine is running ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a woman sneezes then speaks", "small dogs yip and bark sharply"], "sample_ids": ["x4dZyf9Gbj0", "v-wcQf4BDY0"], "start_seconds": ["130", "120"], "properties": ["sneezes, speaks, woman", "bark, yip, sharply"], "captions_pred_video": ["footage is blurry and out of focus", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a woman sneezes and speaks", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman talking as an infant is crying", "a clock ticktocks"], "sample_ids": ["tMbMDvT50j8", "v-g-j2uTByM"], "start_seconds": ["12", "30"], "properties": ["a, talk, infant", "ticktocks, clock, ticktocks"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a baby cries and a woman speaks", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["ugHJF0hfYkg", "su6FAOcOA8c"], "start_seconds": ["10", "4"], "properties": ["loud, intense, propeller", "engine, idle, woman"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and a subway train is moving "], "question": "which entity is quieter", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["yajyRTUQk3U", "y2bVZ7rz-5M"], "start_seconds": ["400", "280"], "properties": ["noise, woman, speak", "motor noise, horn, siren"], "captions_pred_video": ["- a woman cooking in the kitchen", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a truck is honking its horn and a siren is blaring "], "question": "which noise is more ominous", "label": 1}, {"captions": ["an engine runs and a man speaks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["yT5WfYMRr-U", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["engine, run, man", "multiple, people, yell"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["humming and rattling of an engine idling as it revs", "wind blows as people chatter quietly"], "sample_ids": ["xMXvkIcaG0Y", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["sound, humming, rattling", "wind, chatter, people"], "captions_pred_video": ["footage of a car's hood being opened up to reveal the engine underneath the hood", "footage is blurry and out of focus"], "captions_pred_audio": ["an engine is revving and accelerating ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a duck quacks continuously", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vh30P49Po6s", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["quacks, continuously, duck", "stream, water, flow"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage is blurry and out of focus"], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["wSVhSdj0F0", "tDVADusiIoc"], "start_seconds": ["10", "60"], "properties": ["horn honks, keys jingle, slam", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "a person snores loudly multiple times at a close distance"], "sample_ids": ["tK4VlLsNxak", "sSMl2vc3ek"], "start_seconds": ["120", "20"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "loud, multiple, distance"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", null], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a toilet flushes and water drains", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sfAvvZwdLCY", "wz7N8YRy74I"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "rooster, crow, background, men"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man speaking to a rooster?", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "the revving of an engine throttle followed by a man speaking"], "sample_ids": ["zFjIWfSD-4", "tezvROoo4bs"], "start_seconds": ["410", "40"], "properties": ["People, motor, brakes", "audio, throttle, speaking"], "captions_pred_video": [null, "footage of a busy city street with cars parked on both sides of the road"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a car accelerates and revs while a man speaks "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a frog croaks as other frogs croak in the background"], "sample_ids": ["yYEVLuqEytU", "yswmmRZFItk"], "start_seconds": ["40", "0"], "properties": ["animal, pig, background", "background, frog, croak"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "a close up of a frog in the water"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a frog is croaking"], "question": "which entity has a frog in the background?", "label": 1}, {"captions": ["continuous snoring", "paper folding and crinkling"], "sample_ids": ["sLkeqCDJIyw", "zPpG3RD8lSs"], "start_seconds": ["120", "20"], "properties": ["loud, snoring, noise", "paper, fold, crinkle"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a person is snoring loudly", "the wind blows and a mouse clicks "], "question": "which entity is quieter", "label": 1}, {"captions": ["birds chirp then an animal grunts", "a airplane flies overhead as a woman speaks"], "sample_ids": ["tDlysoZiA1I", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["animal, grunt, chirp", "airplane, fly, woman"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["ylpYOorfH4o", "uEU-Hg5MTN8"], "start_seconds": ["410", "27"], "properties": ["engine, running, wind", "a woman, laughs, animal"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["zY3icUyMdh8", "yDoT73BWsdA"], "start_seconds": ["20", "10"], "properties": ["dog, bark, engine", "engine, revs, vehicle"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "dishes cling together then a man begins to speak"], "sample_ids": ["xfaoyyzw2WU", "sQGXqGcwOTc"], "start_seconds": ["180", "3"], "properties": ["loud, jet engine, roar", "cling, speak, dishes"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "mechanisms are operating and water is splashing "], "question": "which entity is quieter", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "ticking continues without interruption"], "sample_ids": ["tQWGZLItBXk", "v-g-j2uTByM"], "start_seconds": ["170", "30"], "properties": ["voice, music, whoosh", "ticking, continuous, clock"], "captions_pred_video": ["worms revolution screenshots", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a clock is ticking loudly"], "question": "which entity is a clock?", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "wind blowing followed by a zoom"], "sample_ids": ["tQWGZLItBXk", "vr8ZXjEBhMQ"], "start_seconds": ["170", "150"], "properties": ["music, person, ding", "wind, blow, zoom"], "captions_pred_video": ["worms revolution screenshots", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sQGXqGcwOTc", "sLUnaPT5gM8"], "start_seconds": ["3", "0"], "properties": ["audio, kid, giggles", "loud, laughter, intermittent"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more like a scream", "label": 1}, {"captions": ["someone snores nearby", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["spJCm8tD9Zo", "wqZ135Ssz0"], "start_seconds": ["90", "60"], "properties": ["someone snores, nearby, someone", "two men, woman, birds"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "water pouring and bubbling"], "sample_ids": ["sWZzXuWYY", "uyRfq-jKPpo"], "start_seconds": ["420", "50"], "properties": ["male, clanks, thumps", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "food is frying while a woman speaks"], "sample_ids": ["zcDwZ6W7E3E", "yhQ2Lg-7qDY"], "start_seconds": ["180", "130"], "properties": ["man, speak, motorcycles", "food, woman, speak"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "a pan filled with meat and sauce being cooked on a stove top"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a faucet is running and a man is speaking"], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "roadway noise occurs and a truck accelerates"], "sample_ids": ["su6FAOcOA8c", "tgbONvsP47Y"], "start_seconds": ["4", "0"], "properties": ["engine, run, woman", "noise, truck, accelerate"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a car is driving on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "dishes cling together then a man begins to speak"], "sample_ids": ["yNtRmrn0io8", "sQGXqGcwOTc"], "start_seconds": ["210", "3"], "properties": ["storm, distance, strike", "cling, speak, dishes"], "captions_pred_video": ["footage of a house in the middle of the night", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["rain falls and thunder roars", "mechanisms are operating and water is splashing "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["people speak in a closed space", "a man speaks followed by another man speaking outside"], "sample_ids": ["sTpirNYo8vQ", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["people, space, speak", "two men, speak, follow"], "captions_pred_video": ["of a man taking a selfie on a bus", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a man is speaking with background noise and breathing sounds "], "question": "which entity shows two men speaking?", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "someone snores nearby"], "sample_ids": ["vimzuGQvdcU", "spJCm8tD9Zo"], "start_seconds": ["30", "90"], "properties": ["a, man, yells", "someone snores, nearby, someone"], "captions_pred_video": ["a group of people are rafting down a river", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a person is snoring loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "water flows and trickles"], "sample_ids": ["tDlfY3nmx1A", "tB7hWb9gTuQ"], "start_seconds": ["160", "30"], "properties": ["applause, laugh, man", "water, flow, trickle"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a propeller moves loudly nearby", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["ugHJF0hfYkg", "uYT5gxnyMWM"], "start_seconds": ["10", "50"], "properties": ["loud, propeller, move", "female, spraying, scream"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and a baby is crying"], "question": "which entity is quieter", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "people speak as gunfire rings out"], "sample_ids": ["uWAAAL4CIoc", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["a woman, chirps, animal", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a man is speaking and a gun is fired"], "question": "which entity is more calm", "label": 0}, {"captions": ["a vehicle engine runs and someone speaks", "water pouring and bubbling"], "sample_ids": ["zF8yoL0rkbI", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["engine, run, someone", "water, bubbles, pouring"], "captions_pred_video": ["footage of the traffic on the street at night", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "a baby laughs giddily and a woman laughs then speaks"], "sample_ids": ["zY3icUyMdh8", "wjsXBsc7M40"], "start_seconds": ["20", "10"], "properties": ["dog, bark, engine", "a baby laughs, a woman laughs, a woman speaks"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage of the baby playing with a toothbrush"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a baby laughs and a woman speaks"], "question": "which entity is more playful", "label": 1}, {"captions": ["a clock ticktocks briefly", "wind blows and a vehicle blows a hard then a train blows a horn"], "sample_ids": ["u7C-AEBQM", "wnpJndXuxLc"], "start_seconds": ["30", "50"], "properties": ["ticktocks, clock, ticktocks briefly", "blows, vehicle, train"], "captions_pred_video": [null, "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a ticktock of a clock", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is not a clock?", "label": 1}, {"captions": ["birds vocalize and chirp continuously", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["w1mlz3Pe4fU", "ukg5L09Wpvo"], "start_seconds": ["300", "150"], "properties": ["vocalize, chirp, continuously", "clickety-clack, train, whistle"], "captions_pred_video": ["of a bird in a cage", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["birds are chirping and singing", "a train blows its whistle and blows its horn "], "question": "which entity is continuous", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "a clock ticktocks"], "sample_ids": ["yajyRTUQk3U", "v-g-j2uTByM"], "start_seconds": ["400", "30"], "properties": ["a woman, something, fried", "ticktocks, clock, ticktocks"], "captions_pred_video": ["- a woman cooking in the kitchen", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["women speak and laugh as wind blows", "a man laughs and speaks as cats purr and hiss"], "sample_ids": ["un9VQlzgZM", "vVhthZ45k3Y"], "start_seconds": ["5", "30"], "properties": ["wind, speak, laugh", "cat, purr, hiss"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a man is speaking and a cat is meowing"], "question": "which entity is more likely to be a recording", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["ukxt9I7eMMg", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["continuous, woman, speaking", "water, radio, man"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio?", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sEprKHm8Sj8", "su6FAOcOA8c"], "start_seconds": ["90", "4"], "properties": ["car, tires, slows", "engine, idle, woman"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["wind noise makes sound into a microphone", "water flows and trickles"], "sample_ids": ["w8uLijTqtlU", "tB7hWb9gTuQ"], "start_seconds": ["70", "30"], "properties": ["wind, microphone, noise", "water, flow, trickle"], "captions_pred_video": ["footage is blurry and shaky", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["the wind is blowing strongly", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "a toilet flushes and a female speaks"], "sample_ids": ["sa6TLVbooCc", "yaln9y8I7ms"], "start_seconds": ["240", "230"], "properties": ["people, laugh, child", "female, flushes, toilet"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a toilet flushes and a man speaks"], "question": "which entity is a video of a toilet flushing?", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "someone is typing on a computer keyboard"], "sample_ids": ["v5P-ThUCINM", "v0x1odnXtP0"], "start_seconds": ["400", "210"], "properties": ["background, chirp, bird", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["wSVhSdj0F0", "w34HjHr6gAY"], "start_seconds": ["10", "30"], "properties": ["beep, clang, footsteps", "beeps, hit, woman"], "captions_pred_video": [null, "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["tiDFTC-5vU", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["male, duck, laugh", "beeps, hit, woman"], "captions_pred_video": [null, "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a beep sounds followed by a child speaking"], "question": "which entity has a duck quacking?", "label": 0}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sZPuqDgX2V0", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["engine, accelerate, intercom", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a vehicle?", "label": 0}, {"captions": ["men speak and a nozzle sprays liquid", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["wRV8yMk886E", "vbZ-0lGPneg"], "start_seconds": ["0", "30"], "properties": ["liquid, spray, nozzle", "a woman, a television program, a bird"], "captions_pred_video": ["two cars are parked in a parking lot at night", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "vehicles pass by on a roadway"], "sample_ids": ["vimzuGQvdcU", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["a, man, yells", "pass, vehicle, roadway"], "captions_pred_video": ["a group of people are rafting down a river", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a car is driving on the road "], "question": "which entity is more passive", "label": 1}, {"captions": ["an engine runs and wind blows", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vs65y4qmyBE", "tdWhHV3X25Q"], "start_seconds": ["340", "60"], "properties": ["engine, run, wind", "applause, audience, yells"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["an engine revs and a turning noise is made", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["tOSWIURC-4", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["noise, engine, revs", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a lawn mower is running ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["food is frying while a woman speaks", "small dogs yip and bark sharply"], "sample_ids": ["yhQ2Lg-7qDY", "v-wcQf4BDY0"], "start_seconds": ["130", "120"], "properties": ["food, woman, speak", "bark, yip, sharply"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["sAam2NqGhLY", "uEU-Hg5MTN8"], "start_seconds": ["20", "27"], "properties": ["snoring, breathing, child", "animal, grunts, snorts"], "captions_pred_video": ["of a little girl sleeping on a couch", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person is snoring", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "water pouring and bubbling"], "sample_ids": ["xNMovAf3o50", "uyRfq-jKPpo"], "start_seconds": ["0", "50"], "properties": ["rain, thunder, music", "water, bubbles, pouring"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["thunder and rain with music playing in the background ", "water is running from a faucet"], "question": "which entity is more likely to be a natural occurrence", "label": 0}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "someone whistles a tune"], "sample_ids": ["yNtRmrn0io8", "sIXTftIuUgw"], "start_seconds": ["210", "90"], "properties": ["storm, distance, strike", "someone, tune, whistle"], "captions_pred_video": ["footage of a house in the middle of the night", null], "captions_pred_audio": ["rain falls and thunder roars", "a person whistling a song"], "question": "which is not a musical instrument", "label": 0}, {"captions": ["long loud burping by a man", "music plays followed by gunshots and then an explosion"], "sample_ids": ["xmiUIOhtZyQ", "xKB8O8LTs6s"], "start_seconds": ["60", "70"], "properties": ["loud, burp, man", "music, gunshots, explosion"], "captions_pred_video": ["homer simpson drinking a beer", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a person burps and music plays in the background ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "a door opens and closes"], "sample_ids": ["uC9dtII1KDI", "vBHyYJ8pL0"], "start_seconds": ["150", "2"], "properties": ["wind, gusts, distance", "open, close, door"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", null], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which door is more likely to open and close", "label": 1}, {"captions": ["animals bleat and cry out and then a woman speaks", "wind blows as people chatter quietly"], "sample_ids": ["yZp6xizR0yU", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["animal, bleat, cry", "wind, chatter, people"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["ul60S8TXDA8", "ziUT9IFTkjg"], "start_seconds": ["60", "10"], "properties": ["sound, distance, bell", "background, birds, rustling"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", null], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "birds are chirping and a chime is ringing "], "question": "which entity has a bird in the background?", "label": 1}, {"captions": ["birds chirp as a bell rings", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["ziUT9IFTkjg", "xKB8O8LTs6s"], "start_seconds": ["10", "70"], "properties": ["chirp, bell, ring", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "water pouring and bubbling"], "sample_ids": ["vlS6YMeWAPo", "uyRfq-jKPpo"], "start_seconds": ["40", "50"], "properties": ["sheep, baa, birds", "water, bubbles, pouring"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a goat bleats and birds chirp", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["food is frying then a woman speaks", "water rushes and then a vehicle zooms past"], "sample_ids": ["ukxt9I7eMMg", "s4Uz1Ffgo04"], "start_seconds": ["30", "100"], "properties": ["food, woman, speak", "water, rushes, vehicle"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is more active", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vlJS7LN2XyM", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["background, clocks, ticking", "engine, laugh, loud"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a ticktock of a clock", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "a man talks as several small engines run"], "sample_ids": ["wDVMhEdTiVw", "u9A6VZQCZpU"], "start_seconds": ["30", "30"], "properties": ["gun, shoot, water", "a, man, talk"], "captions_pred_video": ["a blurry image of trees and water in the forest", null], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "a man is speaking while a race car is revving and accelerating "], "question": "which entity is talking", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "a woman speaks happily and an animal chirps"], "sample_ids": ["w6RTHR6AeAg", "uWAAAL4CIoc"], "start_seconds": ["40", "0"], "properties": ["call, owl, screech", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a woman is speaking and a dog is barking "], "question": "which entity is a bird?", "label": 0}, {"captions": ["multiple birds chirp and an animal grunts", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["tDlysoZiA1I", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["animal, grunt, multiple", "a, scream, girl"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a woman is speaking and a baby is crying"], "question": "which entity is a human", "label": 1}, {"captions": ["dogs barking and whimpering", "vehicles pass by on a roadway"], "sample_ids": ["tIY7qOV3rEM", "tgbONvsP47Y"], "start_seconds": ["0", "0"], "properties": ["barking, whimpering, dog", "pass, vehicle, roadway"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a car is driving on the road "], "question": "which entity is more passive", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "a duck quacks loudly and continuously"], "sample_ids": ["vf44CgrjT0A", "vh30P49Po6s"], "start_seconds": ["20", "30"], "properties": ["loud, long, person", "loud, continuous, quacks"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a loud burp", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["ukg5L09Wpvo", "ukg5L09Wpvo"], "start_seconds": ["150", "150"], "properties": ["clickety-clack, train, whistle", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a train blows its whistle and blows its horn "], "question": "which train is going faster", "label": 1}, {"captions": ["a goat screams and people speak in the background", "a man speaks as a motor runs in the background"], "sample_ids": ["xC8kbrKJmco", "xZepNM9qcRA"], "start_seconds": ["0", "30"], "properties": ["background, goat, scream", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a goat is bleating ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a dark barks and whimpers", "water flows and trickles"], "sample_ids": ["sYj4hpDUZDQ", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["barks, whimpers, dark", "water, flow, trickle"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a dog barks and a cat meows", "water is splashing and gurgling"], "question": "which entity is more silent", "label": 1}, {"captions": ["birds coo incessantly", "a man speaks as a car is passing by"], "sample_ids": ["yZrFNS7GFBQ", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["coo, bird, incessant", "a, car, pass"], "captions_pred_video": ["of the bird in the cage", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["an owl hoots in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "someone whistles a tune"], "sample_ids": ["tIY7qOV3rEM", "sIXTftIuUgw"], "start_seconds": ["0", "90"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "someone, tune, whistle"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", null], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a person whistling a song"], "question": "which entity is a human activity", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "a man speaks as a car is passing by"], "sample_ids": ["spYNpeN7rPY", "sK4u5T8hW78"], "start_seconds": ["1", "30"], "properties": ["a clock, ticktock, man", "a, car, pass"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a clock?", "label": 0}, {"captions": ["a woman talks while something is fried and objects are tapped", "people cheer as a vehicle engine revs"], "sample_ids": ["yajyRTUQk3U", "xjhAnI2q6hM"], "start_seconds": ["400", "6"], "properties": ["a woman, something, fried", "engine revs, vehicle, people"], "captions_pred_video": ["- a woman cooking in the kitchen", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a person snoring several times", "an infant crying as a woman laughs"], "sample_ids": ["spJCm8tD9Zo", "xhmRY9yhC7c"], "start_seconds": ["90", "20"], "properties": ["snore, person, several", "a, laugh, infant"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a person is snoring loudly", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["zALy31PjDl0", "ukg5L09Wpvo"], "start_seconds": ["21", "150"], "properties": ["a man, a vehicle, a horn", "clickety-clack, train, whistle"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "a woman speaks as she rubs two objects together"], "sample_ids": ["weDbePuc-Xc", "vzxHnu-SFEw"], "start_seconds": ["40", "80"], "properties": ["cartoon character, music, vocalize", "two objects, woman, speak"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "a baby coos and fidgets as a lady speaks and laughs"], "sample_ids": ["yJ0TePmaOo", "uPDn2BFTHk"], "start_seconds": ["390", "140"], "properties": ["two hard objects, man, speak", "lady, laugh, baby"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a baby laughs and a woman speaks"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["a small engine idles continuously", "a clock ticktocks"], "sample_ids": ["y5WII6cTH7k", "v-g-j2uTByM"], "start_seconds": ["40", "30"], "properties": ["engine, idle, continuously", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a clock is ticking loudly"], "question": "which entity is ticking continuously", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "three men talk while wind blows and some liquid flows"], "sample_ids": ["w5W5Kqtc8E", "vJ7JPEFhyLA"], "start_seconds": ["100", "16"], "properties": ["wind, engine, scream", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a vehicle?", "label": 0}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "birds chirp as a man speaks and a younger person speaks"], "sample_ids": ["s4Uz1Ffgo04", "xl2PIWyXaM"], "start_seconds": ["100", "160"], "properties": ["water, rushes, motorcycle", "chirp, man, younger person"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "birds are chirping and people are talking"], "question": "which entity is more calm", "label": 1}, {"captions": ["a man speaks, then dials a rotary telephone", "paper is crumpling consistently"], "sample_ids": ["tK4VlLsNxak", "v5cSxLaHADY"], "start_seconds": ["120", "0"], "properties": ["a, dial, telephone", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["goats bleat and metal clings", "people applaud and hoot and chat quietly"], "sample_ids": ["tH17JPjDPnc", "wwyfGO2J4"], "start_seconds": ["260", "90"], "properties": ["bleat, metal, clings", "people, applaud, hoot"], "captions_pred_video": ["feed of the goats eating hay in the barn", null], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["animals bleat and cry out and then a woman speaks", "someone whistles a tune"], "sample_ids": ["yZp6xizR0yU", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["animal, bleat, cry", "someone, tune, whistle"], "captions_pred_video": ["footage of a woman feeding goats in a barn", null], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a beep repeats multiple times", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["y682ml90jGw", "vbZ-0lGPneg"], "start_seconds": ["11", "30"], "properties": ["beep, repeat, multiple", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a beeping sound is being made ", "a woman is speaking and a dog is whimpering"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["someone sprays a liquid onto a hard surface making a hiss sound", "water runs from a faucet while some men speak and the water runs in the sink"], "sample_ids": ["zO-LSSY92ZM", "vzceMbklWc"], "start_seconds": ["30", "180"], "properties": ["liquid, surface, sound", "water, faucet, sink"], "captions_pred_video": ["youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'", null], "captions_pred_audio": ["steam is hissing and hissing", "water is running and a man is speaking"], "question": "which entity is a source of water", "label": 1}, {"captions": ["an engine runs and a man speaks", "an airplane engine runs"], "sample_ids": ["yT5WfYMRr-U", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["engine, run, man", "engine, airplane, runs"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a car is driving by on the road "], "question": "which entity has a man speaking while an engine runs?", "label": 0}, {"captions": ["a horn rings out as a machine runs by", "a machine beeps continuously"], "sample_ids": ["slZLHwNbbt4", "y682ml90jGw"], "start_seconds": ["300", "11"], "properties": ["a, horn, run", "beeps, machine, continuously"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", null], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a beeping sound is being made "], "question": "which machine is beeping continuously?", "label": 1}, {"captions": ["wind loudly blowing while people speak in the background followed by a horn blowing", "a clock ticktocks"], "sample_ids": ["xjhAnI2q6hM", "v-g-j2uTByM"], "start_seconds": ["6", "30"], "properties": ["wind, blow, loudly", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["dogs barking and whimpering", "wind blows as people chatter quietly"], "sample_ids": ["tIY7qOV3rEM", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["barking, whimpering, dog", "wind, chatter, people"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "footage is blurry and out of focus"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["an emergency siren wails as it passes", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vGj1XLJvNrw", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["wails, wails, pass", "a woman, something, fried"], "captions_pred_video": ["footage of a police car driving down a city street", "- a woman cooking in the kitchen"], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "a person uses a saw to cut some wood"], "sample_ids": ["wTjoRj1se3U", "sHbXC6na9hg"], "start_seconds": ["390", "0"], "properties": ["engine, run, people", "a person, saw, wood"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "a man using a tractor to cut a log into firewood youtube"], "captions_pred_audio": ["a jet engine is running and people are talking", "an engine is idling and vibrating"], "question": "which entity is a person", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "several insects fly while two men talk"], "sample_ids": ["vJvryTwuAV8", "s-T9OVOiMLo"], "start_seconds": ["16", "330"], "properties": ["audience, cheer, man", "several, fly, men"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more insects", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "a stream of water flows as people talk and wind blows"], "sample_ids": ["zofjfKhqLk8", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["background, metal, clank", "stream, water, flow"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage is blurry and out of focus"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing?", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["xyL9F5VrjkE", "uYT5gxnyMWM"], "start_seconds": ["20", "50"], "properties": ["wind, motor, distance", "female, spraying, scream"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["the revving of an engine throttle followed by a man speaking", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["tezvROoo4bs", "uZesmtKZGSw"], "start_seconds": ["40", "250"], "properties": ["audio, throttle, speaking", "men, talk, cars"], "captions_pred_video": ["footage of a busy city street with cars parked on both sides of the road", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a car accelerates and revs while a man speaks ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about cars?", "label": 1}, {"captions": ["food is frying and sizzles", "people cheer as a vehicle engine revs"], "sample_ids": ["zNRChLjqcU", "xjhAnI2q6hM"], "start_seconds": ["220", "6"], "properties": ["food is frying, sizzles, food", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["water is running from a faucet into a sink", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man is filing a hard object", "a stream runs then someone speaks"], "sample_ids": ["vveS8HT7Uog", "wbHTKEJZyhc"], "start_seconds": ["100", "20"], "properties": ["a man, hard, object", "stream, run, someone"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a waterfall is flowing and people are speaking "], "question": "which entity is a stream", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a clock ticktocks"], "sample_ids": ["vZAw4apG0Es", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["background, tick, repeat", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a clock is ticking and people are talking", "a clock is ticking loudly"], "question": "which entity has a tick that repeats", "label": 0}, {"captions": ["the revving of an engine throttle followed by a man speaking", "a clock ticktocks"], "sample_ids": ["tezvROoo4bs", "v-g-j2uTByM"], "start_seconds": ["40", "30"], "properties": ["audio, throttle, speaking", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a busy city street with cars parked on both sides of the road", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a car accelerates and revs while a man speaks ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["x9JovgqUcs", "yajyRTUQk3U"], "start_seconds": ["500", "400"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a person speaking and typing on a computer keyboard?", "label": 0}, {"captions": ["a telephone rings followed by a woman talking", "someone whistles briefly"], "sample_ids": ["tGcFnX0GHI", "uFoga8sHpiw"], "start_seconds": ["0", "90"], "properties": ["ring, talk, woman", "sound, duration, pitch"], "captions_pred_video": [null, "footage of a bird in a cage"], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a person whistles a song"], "question": "which entity has a higher pitch", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "a man speaks followed by another man speaking outside"], "sample_ids": ["ukxt9I7eMMg", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["food, pan, cook", "two men, speak, follow"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a man speaking followed by another man speaking outside?", "label": 1}, {"captions": ["a goat screams and people speak in the background", "water pouring and bubbling"], "sample_ids": ["xC8kbrKJmco", "uyRfq-jKPpo"], "start_seconds": ["0", "50"], "properties": ["background, goat, scream", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a goat is bleating ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "a grown man speaks and water bubbles and runs"], "sample_ids": ["v7jJS8aAyA", "vSeGhaZt-aI"], "start_seconds": ["10", "50"], "properties": ["wind, blows, loudly", "water, bubbles, run"], "captions_pred_video": [null, "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a man is speaking and pouring liquid with background noise "], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a baby cries and a woman moans", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["smDKStoHBJo", "w34HjHr6gAY"], "start_seconds": ["0", "30"], "properties": ["a, cry, woman", "beeps, hit, woman"], "captions_pred_video": ["a man holding a crying baby in his arms", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "people speak as gunfire rings out"], "sample_ids": ["zofjfKhqLk8", "wqTCwqVRDlk"], "start_seconds": ["10", "80"], "properties": ["background, metal, clank", "gunfire, ring, speak"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["a door opens and birds chirp", "a woman speaks happily and an animal chirps"], "sample_ids": ["yeFvk9x0wWI", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["door, open, birds", "a woman, chirps, animal"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", null], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a woman is speaking and a dog is barking "], "question": "which entity is a person", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "children cheer as a man speaks then an audience screams"], "sample_ids": ["zgUgkpk78xU", "vJvryTwuAV8"], "start_seconds": ["70", "16"], "properties": ["horn, bells, ring", "audience, cheer, man"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a man is speaking and a crowd is shouting and whooping "], "question": "which entity is a warning", "label": 0}, {"captions": ["a man speaking with light rustling", "vehicles pass by on a roadway"], "sample_ids": ["zOZleIRqZm4", "tgbONvsP47Y"], "start_seconds": ["80", "0"], "properties": ["light, rustling, man", "pass, vehicle, roadway"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a car is driving on the road "], "question": "which entity is more active", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "water runs into a sink while men speak"], "sample_ids": ["sAam2NqGhLY", "vzceMbklWc"], "start_seconds": ["20", "180"], "properties": ["snoring, breathing, child", "water, sink, run"], "captions_pred_video": ["of a little girl sleeping on a couch", null], "captions_pred_audio": ["a person is snoring", "water is running and a man is speaking"], "question": "which entity is a video", "label": 1}, {"captions": ["a man speaking together with birds chirping and distant murmuring", "roadway noise occurs and a truck accelerates"], "sample_ids": ["uiS58TNyUiw", "tgbONvsP47Y"], "start_seconds": ["430", "0"], "properties": ["audio, man, speaking", "noise, truck, accelerate"], "captions_pred_video": ["of the pigeon in the cage", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a car is driving on the road "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man woman speak while crickets sing", "soft movement is accompanied by clocks ticking in the background"], "sample_ids": ["zTLVJCo4WEE", "vlJS7LN2XyM"], "start_seconds": ["30", "30"], "properties": ["a, crickets, sing", "background, clocks, ticking"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a ticktock of a clock"], "question": "which entity has a clock ticking in the background?", "label": 1}, {"captions": ["a weapon fires multiple times", "wind blows as people chatter quietly"], "sample_ids": ["sMC07Ucy7kg", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["weapon, fire, multiple", "wind, chatter, people"], "captions_pred_video": ["footage is from a car's point of view", "footage is blurry and out of focus"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "people cheer as a vehicle engine revs"], "sample_ids": ["vmrxwuAMb2I", "xjhAnI2q6hM"], "start_seconds": ["40", "6"], "properties": ["a dog, inhales, exhales", "engine revs, vehicle, people"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a dog barks and growls", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["someone snores nearby", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["spJCm8tD9Zo", "wqZ135Ssz0"], "start_seconds": ["90", "60"], "properties": ["someone snores, nearby, someone", "two men, woman, birds"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "animals bleat and moo as a person speaks"], "sample_ids": ["xKB8O8LTs6s", "tPJvjq9QePY"], "start_seconds": ["70", "40"], "properties": ["music, gunfire, explosion", "animal, bleat, moo"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a dog and a sheep in a barn"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a baby cries and a man speaks"], "question": "which entity is more calm", "label": 1}, {"captions": ["some clanking with distant murmuring", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["uMTTDZ2mb4", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["clanking, murmuring, distant", "animal, grunts, snorts"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is more animal like", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "a church bell rings several times"], "sample_ids": ["uPDn2BFTHk", "sUVVjE3Ucp8"], "start_seconds": ["140", "0"], "properties": ["woman, laughs, speaks", "ring, bell, several"], "captions_pred_video": [null, "the video shows a stone wall with a clock on top of it and a bench in front of it"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a church bell is ringing "], "question": "which entity is silent", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "paper is crumpling consistently"], "sample_ids": ["spYNpeN7rPY", "v5cSxLaHADY"], "start_seconds": ["1", "0"], "properties": ["a clock, ticktock, man", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "a piece of wood is being placed down and sawed"], "sample_ids": ["xBxDz0CFVn0", "uiItxDsDMFI"], "start_seconds": ["30", "30"], "properties": ["stream, water, flow", "wood, piece, saw"], "captions_pred_video": ["footage is blurry and out of focus", "a man cutting a log with an axe in the woods"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a saw is being used with background noise "], "question": "which entity is being cut", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["soTOh3zYJfY", "yajyRTUQk3U"], "start_seconds": ["40", "400"], "properties": ["vehicle, skid, tires", "a woman, something, fried"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["w2JXXIAdUdg", "uEU-Hg5MTN8"], "start_seconds": ["10", "27"], "properties": ["snoring, distance, person", "animal, grunts, snorts"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a woman is speaking and a baby is crying"], "question": "which entity is a person speaking indiscriminately in the distance with a person snoring loudly nearby?", "label": 0}, {"captions": ["a man rubs two objects together then speaks", "a telephone rings followed by a woman talking"], "sample_ids": ["vveS8HT7Uog", "tGcFnX0GHI"], "start_seconds": ["100", "0"], "properties": ["a man, objects, speak", "ring, talk, woman"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", null], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["wind blowing and birds chirping with the distant cooing of a large bird", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["wRBHTgrbiwg", "tiDFTC-5vU"], "start_seconds": ["50", "30"], "properties": ["birds, chirp, cooing", "male, duck, laugh"], "captions_pred_video": ["of a bee pollinating the flowers in the field", null], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking and ducks are quacking"], "question": "which entity is a group of birds?", "label": 0}, {"captions": ["a loud engine muffles a man as he speaks", "people speak as gunfire rings out"], "sample_ids": ["xyx6eNVEYRY", "wqTCwqVRDlk"], "start_seconds": ["380", "80"], "properties": ["loud, engine, muffles", "gunfire, ring, speak"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war", "label": 1}, {"captions": ["continuous snoring", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["sLkeqCDJIyw", "ukg5L09Wpvo"], "start_seconds": ["120", "150"], "properties": ["loud, snoring, noise", "clickety-clack, train, whistle"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a person is snoring loudly", "a train blows its whistle and blows its horn "], "question": "which noise is continuous", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "a train horn blows as it passes by"], "sample_ids": ["yZmhM1HcsyE", "zVacuqSb4LI"], "start_seconds": ["4", "30"], "properties": ["engine, roar, water", "horn, blows, train"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which train is passing by", "label": 1}, {"captions": ["birds chirp and wind blows", "wind blowing followed by a zoom"], "sample_ids": ["sxIvBMSavMQ", "vr8ZXjEBhMQ"], "start_seconds": ["210", "150"], "properties": ["birds, chirp, wind", "wind, blow, zoom"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more likely to be a video of a wind blowing?", "label": 1}, {"captions": ["a person screams glaringly", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["xC8kbrKJmco", "xKB8O8LTs6s"], "start_seconds": ["0", "70"], "properties": ["glaringly, screams, person", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a goat is bleating ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sQGXqGcwOTc", "yajyRTUQk3U"], "start_seconds": ["3", "400"], "properties": ["cling, speak, dishes", "a woman, something, fried"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "- a woman cooking in the kitchen"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wqADXCzngMw", "vJ7JPEFhyLA"], "start_seconds": ["340", "16"], "properties": ["engine, idle, man", "three men, wind, flow"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man talking?", "label": 0}, {"captions": ["a woman and man are speaking", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vbpKkWvfOu4", "vfYTJq7nU"], "start_seconds": ["560", "130"], "properties": ["two people, speaking, woman, man", "rustling, ducks, quack"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a duck quacks and a woman speaks"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["w6RTHR6AeAg", "vlS6YMeWAPo"], "start_seconds": ["40", "40"], "properties": ["call, owl, screech", "sheep, baa, birds"], "captions_pred_video": [null, "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a goat bleats and birds chirp"], "question": "which entity is a bird?", "label": 0}, {"captions": ["a helicopter engine runs", "a horn rings out as a machine runs by"], "sample_ids": ["t5ZbXbniOWk", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["engine, helicopter, run", "a, horn, run"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a helicopter is flying overhead ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["vddP56-ogds", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["water, flow, laugh", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["people speak softly as food sizzles", "birds chirp and objects are moved around"], "sample_ids": ["yhQ2Lg-7qDY", "yPUYU6t3rwo"], "start_seconds": ["130", "370"], "properties": ["food, sizzle, speak", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a faucet is running and a man is speaking", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "a man speaks as a boat engine runs"], "sample_ids": ["yDoT73BWsdA", "wtDqrBygTcU"], "start_seconds": ["10", "30"], "properties": ["engine, revs, vehicle", "man, engine, run"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "shows a person riding on the back of a boat as it speeds through the water"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and a motor is running"], "question": "which entity is about a vehicle engine?", "label": 0}, {"captions": ["a woman speaks and dog vocalizes", "a man speaks as a motor runs in the background"], "sample_ids": ["uWAAAL4CIoc", "xZepNM9qcRA"], "start_seconds": ["0", "30"], "properties": ["a, dog, vocalize", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["animals bleat and cry out and then a woman speaks", "a propeller rotates loudly and intensely"], "sample_ids": ["yZp6xizR0yU", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["animal, bleat, cry", "loud, intense, propeller"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "a man speaks as a car is passing by"], "sample_ids": ["uEU-Hg5MTN8", "sK4u5T8hW78"], "start_seconds": ["27", "30"], "properties": ["animal, grunts, snorts", "a, car, pass"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a woman talking as an infant is crying", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["tMbMDvT50j8", "vbZ-0lGPneg"], "start_seconds": ["12", "30"], "properties": ["a, talk, infant", "a woman, a television program, a bird"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a baby cries and a woman speaks", "a woman is speaking and a dog is whimpering"], "question": "which woman is talking", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "ticking continues without interruption"], "sample_ids": ["w2M4i1mklOA", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["alarm, gears, turn", "ticking, continuous, clock"], "captions_pred_video": ["footage of an antique clock", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a clock is ticking loudly"], "question": "which clock is ticking continuously", "label": 1}, {"captions": ["a beep occurs briefly", "an engine runs loudly"], "sample_ids": ["xtWeJ56-U-g", "vqZuVbG6-HI"], "start_seconds": ["20", "130"], "properties": ["beep, occur, briefly", "loud, engine, run"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "footage is blurry because it's raining outside"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "roadway noise occurs and a truck accelerates"], "sample_ids": ["y4tPJXBKDig", "tgbONvsP47Y"], "start_seconds": ["20", "0"], "properties": ["a, noise, talk", "noise, truck, accelerate"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a car is driving on the road "], "question": "which noise is made by a truck", "label": 1}, {"captions": ["a helicopter engine runs", "a toilet flushes and a female speaks"], "sample_ids": ["t5ZbXbniOWk", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["engine, helicopter, run", "female, flushes, toilet"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "footage is blurry and out of focus"], "captions_pred_audio": ["a helicopter is flying overhead ", "a toilet flushes and a man speaks"], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "pigeons vocalize and birds chirp"], "sample_ids": ["vlS6YMeWAPo", "uiS58TNyUiw"], "start_seconds": ["40", "430"], "properties": ["sheep, baa, birds", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "of the pigeon in the cage"], "captions_pred_audio": ["a goat bleats and birds chirp", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["food is frying then a woman speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["ukxt9I7eMMg", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["food, woman, speak", "people, applaud, hoot"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "people are clapping and speaking with background noise "], "question": "which entity is a performance", "label": 1}, {"captions": ["birds chirp then an animal grunts", "a clock ticktocks"], "sample_ids": ["tDlysoZiA1I", "v-g-j2uTByM"], "start_seconds": ["0", "30"], "properties": ["animal, grunt, chirp", "ticktocks, clock, ticktocks"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "a woman speaks happily and an animal chirps"], "sample_ids": ["sU53zg9Jp7s", "uWAAAL4CIoc"], "start_seconds": ["380", "0"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "a woman, chirps, animal"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", null], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a woman is speaking and a dog is barking "], "question": "which entity is more likely to be a crow?", "label": 0}, {"captions": ["material crumbles into a microphone", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vofpvUo6NAw", "zj2R0XoFr5k"], "start_seconds": ["220", "50"], "properties": ["material, crumbles, microphone", "airplane, boy, fly"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "vehicles pass by on a roadway"], "sample_ids": ["xyL9F5VrjkE", "tgbONvsP47Y"], "start_seconds": ["20", "0"], "properties": ["wind, blows, vehicle", "pass, vehicle, roadway"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage of a fire truck entering a garage"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a car is driving on the road "], "question": "which vehicle is moving", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sEprKHm8Sj8", "xKB8O8LTs6s"], "start_seconds": ["90", "70"], "properties": ["car, tires, slows", "music, gunfire, explosion"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sOa7g-44Dag", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["audio, scratching, man", "three men, wind, flow"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["several ducks quack and cocks crow far away", "a man speaks as a car is passing by"], "sample_ids": ["sNB8zxXneIM", "sK4u5T8hW78"], "start_seconds": ["20", "30"], "properties": ["several, quack, cocks", "a, car, pass"], "captions_pred_video": ["a group of geese in a cage", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a clock ticktocks continuously", "a car speeding up in the distance"], "sample_ids": ["vlJS7LN2XyM", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["ticktocks, clock, ticktocks continuously", "distance, car, speed"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", null], "captions_pred_audio": ["a ticktock of a clock", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["t97k0cejSQE", "tiDFTC-5vU"], "start_seconds": ["250", "30"], "properties": ["bird, chirp, insect", "male, duck, laugh"], "captions_pred_video": ["a bee on a purple thistle flower", null], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a man is speaking and ducks are quacking"], "question": "which entity is a group of animals?", "label": 0}, {"captions": ["water bubbles and gurgles.", "an airplane flies overhead as a woman speaks"], "sample_ids": ["tB7hWb9gTuQ", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["bubbles, gurgles, water", "airplane, fly, overhead"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["water is splashing and gurgling", "a woman speaks while a helicopter flies overhead "], "question": "which entity is moving", "label": 1}, {"captions": ["birds chirp then an animal grunts", "someone is typing on a computer keyboard"], "sample_ids": ["tDlysoZiA1I", "v0x1odnXtP0"], "start_seconds": ["0", "210"], "properties": ["animal, grunt, chirp", "keyboard, type, computer"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "how to make money on youtube in spanish"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a person is typing on a keyboard"], "question": "which is not a type of animal", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sWZzXuWYY", "su6FAOcOA8c"], "start_seconds": ["420", "4"], "properties": ["male, clanks, thumps", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a vehicle accelerates and squeals tires"], "sample_ids": ["sfAvvZwdLCY", "yRx9txMcBl0"], "start_seconds": ["20", "40"], "properties": ["water drains, flushes, water", "accelerates, tires, squeals"], "captions_pred_video": ["footage of the toilet in the bathroom", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a toilet is flushed", "a car is revving its engine and skidding "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "people applaud and hoot and chat quietly"], "sample_ids": ["wy1eKjR7KC0", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["people, talk, distance", "people, applaud, hoot"], "captions_pred_video": ["two police officers riding motorcycles down the street", null], "captions_pred_audio": ["a man is speaking and a siren is going off", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a male speaks over some small clicks", "water is sprayed across a hard surface"], "sample_ids": ["uXxVebHsGZ8", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["male, clicks, speak", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a train engine runs and a horn blows", "people applaud and hoot and chat quietly"], "sample_ids": ["zPX9o1uDiI", "wwyfGO2J4"], "start_seconds": ["40", "90"], "properties": ["engine, horn, run", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["pigeons vocalize and birds chirp", "a car revs and accelerates loudly and men and women chatter among themselves"], "sample_ids": ["uiS58TNyUiw", "y8dSeubCNI"], "start_seconds": ["430", "4"], "properties": ["vocalize, bird, chirp", "men, women, car"], "captions_pred_video": ["of the pigeon in the cage", null], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "an engine revving and people talking in the background"], "question": "which entity is a human activity", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a clock ticktocks"], "sample_ids": ["weDbePuc-Xc", "v-g-j2uTByM"], "start_seconds": ["40", "30"], "properties": ["music, slaps, human", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "a train horn blows as it passes by"], "sample_ids": ["sfAvvZwdLCY", "zVacuqSb4LI"], "start_seconds": ["20", "30"], "properties": ["flushes, drains, water", "horn, blows, train"], "captions_pred_video": ["footage of the toilet in the bathroom", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a toilet is flushed", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "water splashes as an animal walks through"], "sample_ids": ["vzceMbklWc", "w1ir-sZ3Im8"], "start_seconds": ["180", "90"], "properties": ["water, faucet, sink", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["water is running and a man is speaking", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["sxYkFKFIZD0", "zFjIWfSD-4"], "start_seconds": ["20", "410"], "properties": ["screech, man, door", "People, motor, brakes"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", null], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running?", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "an infant crying as a woman laughs"], "sample_ids": ["xOZfdgAgJ9o", "xhmRY9yhC7c"], "start_seconds": ["40", "20"], "properties": ["woman, whimpering, speaking", "a, laugh, infant"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a baby cries and a woman speaks"], "question": "which woman is speaking?", "label": 0}, {"captions": ["multiple birds vocalize and wind blows", "a person snores loudly multiple times at a close distance"], "sample_ids": ["uoGVs9yUqY4", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["multiple, vocalize, wind", "loud, multiple, distance"], "captions_pred_video": ["for how to make a wooden shed door youtube", null], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a kid speaks followed by music playing", "a drill drills through something then people begin laughing"], "sample_ids": ["tQWGZLItBXk", "tEE3MpBt1sg"], "start_seconds": ["170", "50"], "properties": ["music, kid, speak", "drill, something, laugh"], "captions_pred_video": ["worms revolution screenshots", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "people are laughing breathing and speaking with background noise "], "question": "which entity is a drill?", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vBHyYJ8pL0", "ukg5L09Wpvo"], "start_seconds": ["2", "150"], "properties": ["noise, door, opening", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a clock ticktocks in wind", "an airplane engine spools and people speak"], "sample_ids": ["yVumC9TGknc", "wTjoRj1se3U"], "start_seconds": ["30", "390"], "properties": ["ticktocks, clock, wind", "airplane, engine, spool"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a series of beeps and chirps", "a jet engine is running and people are talking"], "question": "which entity is moving", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "water is sprayed across a hard surface"], "sample_ids": ["wDVMhEdTiVw", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["gun, shoot, water", "water, spray, surface"], "captions_pred_video": ["a blurry image of trees and water in the forest", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "spraying followed by silence"], "question": "which entity is a spray of water?", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "birds chirp, a woman speaks, and insects buzz"], "sample_ids": ["u7C-AEBQM", "t97k0cejSQE"], "start_seconds": ["30", "250"], "properties": ["ticks, rhythmic, quiet", "sound, chirp, buzz"], "captions_pred_video": [null, "a bee on a purple thistle flower"], "captions_pred_audio": ["a ticktock of a clock", "a bee buzzes and a woman speaks"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a telephone rings and a bird vocalizes", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["skd2PphS6oI", "uYT5gxnyMWM"], "start_seconds": ["190", "50"], "properties": ["ring, bird, vocalize", "female, spraying, scream"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a telephone bell rings repeatedly ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a small engine idles continuously", "a car speeding up in the distance"], "sample_ids": ["y5WII6cTH7k", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["engine, idle, continuously", "distance, car, speed"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", null], "captions_pred_audio": ["an engine is knocking and vibrating ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["an adult male speaks and dials a rotary phone", "a car speeding up in the distance"], "sample_ids": ["tK4VlLsNxak", "u0TrcHhkPQ"], "start_seconds": ["120", "20"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "distance, car, speed"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", null], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "wind blows as people chatter quietly"], "sample_ids": ["vms5XGTDVQc", "xBxDz0CFVn0"], "start_seconds": ["220", "30"], "properties": ["paper, crumpled, crinkled", "wind, chatter, people"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "footage is blurry and out of focus"], "captions_pred_audio": ["paper is crumpled and crinkled", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "propeller rearing loudly with some male and female voices interspersed in the background"], "sample_ids": ["vJvryTwuAV8", "vqZuVbG6-HI"], "start_seconds": ["16", "130"], "properties": ["audience, cheer, man", "background, male, female"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a lawn mower is running and men are speaking "], "question": "which entity has a man speaking to an audience?", "label": 0}, {"captions": ["someone is snoring while sleeping", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["ujMt0-D-x2k", "zl9Dqx-j7q4"], "start_seconds": ["0", "6"], "properties": ["snore, sleep, someone", "engine, laugh, loud"], "captions_pred_video": ["of the dog playing with a toy on the floor", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a person is snoring loudly", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "a vehicle engine accelerating then running on idle"], "sample_ids": ["sWZzXuWYY", "vYkA3cfXp5Q"], "start_seconds": ["420", "30"], "properties": ["male, clanks, thumps", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "winds blows roughly as a vehicle races past"], "sample_ids": ["yRx9txMcBl0", "xjvTpk2Zpr8"], "start_seconds": ["40", "70"], "properties": ["accelerates, tires, squeals", "wind, blows, vehicle"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a jet engine roars and wind blows "], "question": "which vehicle is moving faster", "label": 1}, {"captions": ["a person snoring", "someone snores nearby"], "sample_ids": ["t8tv5YRMJUg", "spJCm8tD9Zo"], "start_seconds": ["0", "90"], "properties": ["a person, snore, loud", "someone snores, nearby, someone"], "captions_pred_video": ["of a man getting his face licked by another man", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a person sniffs and breathes heavily", "a person is snoring loudly"], "question": "which entity is a person?", "label": 0}, {"captions": ["motors runs briefly and tires screech", "a vehicle is skidding and squealing tires"], "sample_ids": ["yRx9txMcBl0", "soTOh3zYJfY"], "start_seconds": ["40", "40"], "properties": ["motors, tires, screech", "vehicle, skid, tires"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a red car drifting on a winding road with smoke coming out of it"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["wztCSUxOf8", "yajyRTUQk3U"], "start_seconds": ["130", "400"], "properties": ["a crowd, yells, applauds", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a woman is speaking while food is frying in the background"], "question": "which entity is a demonstration", "label": 1}, {"captions": ["scraping and female speech with distant music", "people speak as gunfire rings out"], "sample_ids": ["yHeVV-xeOxQ", "wqTCwqVRDlk"], "start_seconds": ["130", "80"], "properties": ["female, speech, music", "gunfire, ring, speak"], "captions_pred_video": ["of a girl milking a goat's udder", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["vbZ-0lGPneg", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["a woman, a television program, a bird", "engine, revs, vehicle"], "captions_pred_video": ["of a man holding a baby duck in his hands", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "water pouring and bubbling"], "sample_ids": ["yajyRTUQk3U", "uyRfq-jKPpo"], "start_seconds": ["400", "50"], "properties": ["noise, woman, speak", "water, bubbles, pouring"], "captions_pred_video": ["- a woman cooking in the kitchen", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["someone sprays a liquid onto a hard surface making a hiss sound", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["zO-LSSY92ZM", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["liquid, surface, sound", "music, gunfire, explosion"], "captions_pred_video": ["youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["steam is hissing and hissing", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["people speak softly as food sizzles", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["yhQ2Lg-7qDY", "uYT5gxnyMWM"], "start_seconds": ["130", "50"], "properties": ["food, sizzle, speak", "female, spraying, scream"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a woman is speaking and a baby is crying"], "question": "which entity is more violent", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "a clock ticktocks"], "sample_ids": ["zY3icUyMdh8", "v-g-j2uTByM"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["wRV8yMk886E", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["liquid, spray, nozzle", "a woman, something, fried"], "captions_pred_video": ["two cars are parked in a parking lot at night", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking", "label": 1}, {"captions": ["birds tweet and squawk", "an adult speaks and is typing on a computer keyboard"], "sample_ids": ["w1mlz3Pe4fU", "x9JovgqUcs"], "start_seconds": ["300", "500"], "properties": ["squawk, tweet, scream", "An adult is speaking, typing, and using a computer keyboard"], "captions_pred_video": ["of a bird in a cage", null], "captions_pred_audio": ["birds are chirping and singing", "a man speaks and types on a keyboard"], "question": "which entity is speaking", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["tDVADusiIoc", "uYT5gxnyMWM"], "start_seconds": ["60", "50"], "properties": ["wind, radio, waves", "a, scream, girl"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman is speaking and a baby is crying"], "question": "which entity has a man speaking over a radio?", "label": 0}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "water is sprayed across a hard surface"], "sample_ids": ["y8dSeubCNI", "sQwlkXjQabo"], "start_seconds": ["4", "10"], "properties": ["men, women, car", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["an engine revving and people talking in the background", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a child yells and another yells", "a machine beeps continuously"], "sample_ids": ["vMDHu7Lxcgw", "y682ml90jGw"], "start_seconds": ["410", "11"], "properties": ["two, yell, child", "beeps, machine, continuously"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", null], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["a machine beeps continuously", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["y682ml90jGw", "ukg5L09Wpvo"], "start_seconds": ["11", "150"], "properties": ["beeps, machine, continuously", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a beeping sound is being made ", "a train blows its whistle and blows its horn "], "question": "which entity is continuous", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "an adult speaks and is typing on a computer keyboard"], "sample_ids": ["w6RTHR6AeAg", "x9JovgqUcs"], "start_seconds": ["40", "500"], "properties": ["call, owl, screech", "An adult is speaking, typing, and using a computer keyboard"], "captions_pred_video": [null, null], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a man speaks and types on a keyboard"], "question": "which entity is speaking", "label": 1}, {"captions": ["a door opens and closes", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["vBHyYJ8pL0", "sLUnaPT5gM8"], "start_seconds": ["2", "0"], "properties": ["open, close, door", "loud, laughter, intermittent"], "captions_pred_video": [null, "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["multiple insects buzz over rustling wind", "water rushes and then a vehicle zooms past"], "sample_ids": ["tMJne1a4AFI", "s4Uz1Ffgo04"], "start_seconds": ["0", "100"], "properties": ["wind, buzz, rustling", "water, rushes, vehicle"], "captions_pred_video": ["a swarm of bees on the ground", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is moving faster", "label": 1}, {"captions": ["water flows and trickles", "a woman speaks and then a man speaks"], "sample_ids": ["tB7hWb9gTuQ", "vbpKkWvfOu4"], "start_seconds": ["30", "560"], "properties": ["water, flow, trickle", "a, man, speaks"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["water is splashing and gurgling", "a woman is speaking and a man is speaking"], "question": "which entity is a human speaking?", "label": 1}, {"captions": ["food is frying and sizzles", "a man speaks while turning a water faucet on"], "sample_ids": ["zNRChLjqcU", "vf9xf3vMsGM"], "start_seconds": ["220", "540"], "properties": ["food is frying, sizzles, food", "A man speaks while turning a water faucet on."], "captions_pred_video": [null, "of the person washing their hands under the faucet"], "captions_pred_audio": ["water is running from a faucet into a sink", "a man is speaking while water is running in the background"], "question": "which entity is a man speaking while turning a water faucet on?", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "popping and crackling repeats as men yell and laugh"], "sample_ids": ["un9VQlzgZM", "rqu8iB22IY"], "start_seconds": ["5", "5"], "properties": ["females, talk, laugh", "sound, repeats, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a dog barks and a man speaks while music plays "], "question": "which entity is more like a joke", "label": 1}, {"captions": ["people clap and speak in the distance", "a child speaks in closed space"], "sample_ids": ["wwyfGO2J4", "yW6FWLSLkx4"], "start_seconds": ["90", "40"], "properties": ["clap, distance, speak", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "water splashes as an animal walks through"], "sample_ids": ["vddP56-ogds", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["water, splash, person, laugh", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "water splashes and gurgles as people speak"], "question": "which entity is about splashing water?", "label": 0}, {"captions": ["a man talks as something metal hits against and glass is set down", "a woman speaks happily and an animal chirps"], "sample_ids": ["x6ijhqRY38s", "uWAAAL4CIoc"], "start_seconds": ["250", "0"], "properties": ["something metal, glass, hit", "a woman, chirps, animal"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", null], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a woman is speaking and a dog is barking "], "question": "which entity is more likely to be a recording", "label": 1}, {"captions": ["a person speaks over rustling leaves", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["zOZleIRqZm4", "tiDFTC-5vU"], "start_seconds": ["80", "30"], "properties": ["rustling, leaves, person", "male, duck, laugh"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking over rustling leaves?", "label": 0}, {"captions": ["birds chirp and an insect buzzes around", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["t97k0cejSQE", "zj2R0XoFr5k"], "start_seconds": ["250", "50"], "properties": ["bird, chirp, insect", "airplane, boy, fly"], "captions_pred_video": ["a bee on a purple thistle flower", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a child babbles as a woman speaks", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["wEBlkGWVWwE", "uEU-Hg5MTN8"], "start_seconds": ["260", "27"], "properties": ["a, babble, woman", "a woman, laughs, animal"], "captions_pred_video": ["shows a person writing on the whiteboard", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "a horn rings out as a machine runs by"], "sample_ids": ["uRExseg-0XI", "slZLHwNbbt4"], "start_seconds": ["210", "300"], "properties": ["woman, man, water", "a, horn, run"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity has a horn?", "label": 1}, {"captions": ["wind loudly blowing while people speak in the background followed by a horn blowing", "an insect buzzes around continuously"], "sample_ids": ["xjhAnI2q6hM", "v25l1jef3JY"], "start_seconds": ["6", "0"], "properties": ["wind, blow, loudly", "buzzes, continuously, insect"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a fly is buzzing around a microphone "], "question": "which entity is a natural phenomenon", "label": 0}, {"captions": ["a woman speaks and dog vocalizes", "a duck quacks loudly and continuously"], "sample_ids": ["uWAAAL4CIoc", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["a, dog, vocalize", "loud, continuous, quacks"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a duck is quacking loudly"], "question": "which animal is speaking", "label": 1}, {"captions": ["an airplane accelerates briefly", "a duck quacks loudly and continuously"], "sample_ids": ["zjTG0gaGCUI", "vh30P49Po6s"], "start_seconds": ["80", "30"], "properties": ["accelerates, airplane, briefly", "loud, continuous, quacks"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "birds chirp and objects are moved around"], "sample_ids": ["y8WEcpOlT3I", "yPUYU6t3rwo"], "start_seconds": ["40", "370"], "properties": ["wind, speak, buffeting", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["on how to use a sewing machine youtube", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a person sniffs and sneezes", "a infant makes noise and is excited"], "sample_ids": ["uRlbY6aoBU", "wIJK3-5y0kA"], "start_seconds": ["0", "30"], "properties": ["sneezes, person, sniffs", "noise, excited, infant"], "captions_pred_video": [null, "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a man is sneezing ", "a baby cries and a woman speaks"], "question": "which entity is more likely to be a baby", "label": 1}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wjsXBsc7M40", "vJ7JPEFhyLA"], "start_seconds": ["10", "16"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "three men, wind, flow"], "captions_pred_video": ["footage of the baby playing with a toothbrush", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 0}, {"captions": ["an engine idles quietly then gradually becomes louder", "a man speaks as birds chirp and a vehicle passes nearby"], "sample_ids": ["vbr9mHKc8WM", "siJFXfGWgDk"], "start_seconds": ["40", "50"], "properties": ["noise, loudness, engine", "a, bird, vehicle"], "captions_pred_video": [null, "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["an engine is idling", "a man is speaking and birds are chirping in the background "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["su6FAOcOA8c", "y8WEcpOlT3I"], "start_seconds": ["4", "40"], "properties": ["engine, idle, woman", "harsh, wind, blows"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a man is speaking with wind noise in the background "], "question": "which entity is a man speaking to another man?", "label": 1}, {"captions": ["people speak then an engine runs", "some men converse over an engine running"], "sample_ids": ["uMTTDZ2mb4", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["engine, run, people", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows people speaking while an engine runs?", "label": 0}, {"captions": ["a man speaks, then dials a rotary telephone", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["tK4VlLsNxak", "y8WEcpOlT3I"], "start_seconds": ["120", "40"], "properties": ["a, dial, telephone", "harsh, wind, blows"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a man is speaking with wind noise in the background "], "question": "which entity is about a man speaking and dialing a rotary telephone?", "label": 0}, {"captions": ["a person is burping while a girl speaks", "a horn blasts as warning bells ring"], "sample_ids": ["vdoxuJn9lTc", "zgUgkpk78xU"], "start_seconds": ["40", "70"], "properties": ["person, burp, girl", "horn, bells, ring"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108"], "captions_pred_audio": ["a child speaks followed by a burp", "a train blows its horn as it speeds down the tracks "], "question": "which entity is louder", "label": 1}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "a car accelerates and wind blows"], "sample_ids": ["ukg5L09Wpvo", "u0TrcHhkPQ"], "start_seconds": ["150", "20"], "properties": ["clickety-clack, train, whistle", "accelerates, wind, blows"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", null], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["zcDwZ6W7E3E", "uYT5gxnyMWM"], "start_seconds": ["180", "50"], "properties": ["man, speak, motorcycles", "a, scream, girl"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a woman is speaking and a baby is crying"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["small dogs yip and bark sharply", "a duck quacks continuously"], "sample_ids": ["v-wcQf4BDY0", "vh30P49Po6s"], "start_seconds": ["120", "30"], "properties": ["bark, yip, sharply", "quacks, continuously, duck"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a dog barks and growls", "a duck is quacking loudly"], "question": "which entity is a bird", "label": 1}, {"captions": ["water runs into a sink while men speak", "continuous chugging with birds chirping in the background"], "sample_ids": ["vzceMbklWc", "xM4joTqDVp4"], "start_seconds": ["180", "160"], "properties": ["water, sink, run", "background, chirp, birds"], "captions_pred_video": [null, "footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack"], "captions_pred_audio": ["water is running and a man is speaking", "birds are chirping and a train is moving "], "question": "which entity has birds chirping in the background?", "label": 1}, {"captions": ["birds chirp and pigeons vocalize while walking around", "a man speaks as a car is passing by"], "sample_ids": ["wIvYjuR3nrg", "sK4u5T8hW78"], "start_seconds": ["9", "30"], "properties": ["birds, pigeons, vocalize", "a, car, pass"], "captions_pred_video": ["footage of a pigeon sitting on a roof with trees in the background", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["birds are chirping and cooing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a moving object", "label": 1}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["yI-KvObbDoY", "ukg5L09Wpvo"], "start_seconds": ["260", "150"], "properties": ["sound, smack, wind", "clickety-clack, train, whistle"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vbr9mHKc8WM", "uZesmtKZGSw"], "start_seconds": ["40", "250"], "properties": ["noise, loudness, engine", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["an engine is idling", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["an electronic device bleeps once", "an infant crying frantically"], "sample_ids": ["tHJ6JSa8Y4", "zwOBqeFTgiU"], "start_seconds": ["0", "30"], "properties": ["bleeps, electronic, device", "cry, infant, frantically"], "captions_pred_video": [null, "of the baby crying in the car seat"], "captions_pred_audio": ["a clock is ticking and beeping", "a baby cries loudly"], "question": "which entity is a human", "label": 1}, {"captions": ["an audience gives applause as a man yells and a group sings", "water flows and trickles"], "sample_ids": ["tdWhHV3X25Q", "tB7hWb9gTuQ"], "start_seconds": ["60", "30"], "properties": ["applause, audience, yells", "water, flow, trickle"], "captions_pred_video": ["a man is talking to another man on a stage in front of a microphone", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a person snoring", "a duck quacks continuously"], "sample_ids": ["t8tv5YRMJUg", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["a person, snore, loud", "quacks, continuously, duck"], "captions_pred_video": ["of a man getting his face licked by another man", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a person sniffs and breathes heavily", "a duck is quacking loudly"], "question": "which entity is not a person?", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "an insect buzzes around continuously"], "sample_ids": ["vuUVPzd2FXw", "v25l1jef3JY"], "start_seconds": ["160", "0"], "properties": ["a, steam, release", "buzzes, continuously, insect"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["a man speaks as water trickles down a stream", "people cheer as a vehicle engine revs"], "sample_ids": ["sapQIQUhFc", "xjhAnI2q6hM"], "start_seconds": ["280", "6"], "properties": ["water, stream, trickles", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "multiple people speak and children yell while water gurgles"], "sample_ids": ["wy1eKjR7KC0", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["people, talk, distance", "multiple, people, yell"], "captions_pred_video": ["two police officers riding motorcycles down the street", null], "captions_pred_audio": ["a man is speaking and a siren is going off", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "people speak as gunfire rings out"], "sample_ids": ["yDoT73BWsdA", "wqTCwqVRDlk"], "start_seconds": ["10", "80"], "properties": ["engine, revs, vehicle", "gunfire, ring, speak"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["an engine runs and a man speaks", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["yT5WfYMRr-U", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["engine, run, man", "airplane, boy, fly"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a plane flying by?", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "a clock ticktocks"], "sample_ids": ["uPDn2BFTHk", "v-g-j2uTByM"], "start_seconds": ["140", "30"], "properties": ["lady, laugh, baby", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["an engine runs and a man speaks", "wind blowing followed by a zoom"], "sample_ids": ["yT5WfYMRr-U", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["engine, run, man", "wind, blow, zoom"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a speedboat passes quickly on the water", "an airplane engine runs"], "sample_ids": ["tjmoSi330GM", "yVPZ2MNWpms"], "start_seconds": ["23", "0"], "properties": ["speed, water, boat", "engine, airplane, runs"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "a car is driving by on the road "], "question": "which is not a vehicle", "label": 1}, {"captions": ["a woman and man are speaking", "a child speaks in closed space"], "sample_ids": ["vbpKkWvfOu4", "yW6FWLSLkx4"], "start_seconds": ["560", "40"], "properties": ["two people, speaking, woman, man", "child, space, speak"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a single person", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "a telephone rings followed by a woman talking"], "sample_ids": ["wAAkbZToh8", "tGcFnX0GHI"], "start_seconds": ["0", "0"], "properties": ["burp, laugh, speak", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man burps and a woman speaks", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["continuous sneezing together with speech", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["x4dZyf9Gbj0", "vbZ-0lGPneg"], "start_seconds": ["130", "30"], "properties": ["continuous, sneeze, speech", "a woman, a television program, a bird"], "captions_pred_video": ["footage is blurry and out of focus", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a woman sneezes and speaks", "a woman is speaking and a dog is whimpering"], "question": "which entity is a person", "label": 1}, {"captions": ["a jet engine spools up and takes off", "an airplane engine spools and people speak"], "sample_ids": ["vBslzh7saPw", "wTjoRj1se3U"], "start_seconds": ["90", "390"], "properties": ["engine, spools, takes", "airplane, engine, spool"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a jet engine is running and people are talking"], "question": "which entity is a video of an airplane engine spooling?", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sQwlkXjQabo", "xKB8O8LTs6s"], "start_seconds": ["10", "70"], "properties": ["liquid, surface, spray", "music, gunfire, explosion"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["spraying followed by silence", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["water pours followed by a woman speaking and then a man speaking", "small dogs yip and bark sharply"], "sample_ids": ["uRExseg-0XI", "v-wcQf4BDY0"], "start_seconds": ["210", "120"], "properties": ["audio, woman, man", "bark, yip, sharply"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "a stream of water runs briefly"], "sample_ids": ["xZepNM9qcRA", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["background, motor, run", "stream, water, run"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a dark barks and whimpers", "a person snores loudly multiple times at a close distance"], "sample_ids": ["sYj4hpDUZDQ", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["barks, whimpers, dark", "loud, multiple, distance"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", null], "captions_pred_audio": ["a dog barks and a cat meows", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "a man speaks as insects buzz and a bird chirps"], "sample_ids": ["w34HjHr6gAY", "t25U-v4k4ts"], "start_seconds": ["30", "40"], "properties": ["beeps, hit, woman", "a, chirps, bird"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a man is speaking and bees are buzzing"], "question": "which entity has a bird chirp?", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "water is sprayed across a hard surface"], "sample_ids": ["xOZfdgAgJ9o", "sQwlkXjQabo"], "start_seconds": ["40", "10"], "properties": ["woman, whimpering, speaking", "water, spray, surface"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "spraying followed by silence"], "question": "which entity is a video of a woman speaking and another woman whimpering?", "label": 0}, {"captions": ["a train horn sounds as a railroad passing bell rings", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zgUgkpk78xU", "tdWhHV3X25Q"], "start_seconds": ["70", "60"], "properties": ["horn, bell, train", "applause, audience, yells"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a man is speaking and a crowd is clapping"], "question": "which entity is a human activity", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["wqZ135Ssz0", "ukg5L09Wpvo"], "start_seconds": ["60", "150"], "properties": ["man, woman, squawks", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a train blows its whistle and blows its horn "], "question": "which entity is a train", "label": 1}, {"captions": ["a child speaks", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["yW6FWLSLkx4", "zj2R0XoFr5k"], "start_seconds": ["40", "50"], "properties": ["a, child, speaks", "airplane, boy, fly"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a child speaking?", "label": 0}, {"captions": ["someone is burping continuously", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["y636gklDioE", "uEU-Hg5MTN8"], "start_seconds": ["20", "27"], "properties": ["burps, burps, burps", "a woman, laughs, animal"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person burps loudly several times", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a prank", "label": 1}, {"captions": ["a weapon fires multiple times", "a woman speaks as she rubs two objects together"], "sample_ids": ["sMC07Ucy7kg", "vzxHnu-SFEw"], "start_seconds": ["10", "80"], "properties": ["weapon, fire, multiple", "two objects, woman, speak"], "captions_pred_video": ["footage is from a car's point of view", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is not a weapon", "label": 1}, {"captions": ["a diesel truck engine runs continuously", "a motor vehicle roars, drowning out people speaking in the background"], "sample_ids": ["sZvwOuuPGP0", "s4Uz1Ffgo04"], "start_seconds": ["50", "100"], "properties": ["engine, diesel, truck", "roars, background, people speaking"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a medium engine is running ", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["someone snores nearby", "a car speeding up in the distance"], "sample_ids": ["spJCm8tD9Zo", "u0TrcHhkPQ"], "start_seconds": ["90", "20"], "properties": ["someone snores, nearby, someone", "distance, car, speed"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 1}, {"captions": ["a toilet flushes and water drains", "an infant crying frantically"], "sample_ids": ["sfAvvZwdLCY", "zwOBqeFTgiU"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "cry, infant, frantically"], "captions_pred_video": ["footage of the toilet in the bathroom", "of the baby crying in the car seat"], "captions_pred_audio": ["a toilet is flushed", "a baby cries loudly"], "question": "which entity is a human", "label": 1}, {"captions": ["a dog barks and whimpers", "soft movement is accompanied by clocks ticking in the background"], "sample_ids": ["sShpyu2l4YQ", "vlJS7LN2XyM"], "start_seconds": ["0", "30"], "properties": ["barks, whimpers, dog", "background, clocks, ticking"], "captions_pred_video": ["the puppies are playing with a toy", "of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video"], "captions_pred_audio": ["a dog is barking and growling", "a ticktock of a clock"], "question": "which entity is more calm", "label": 1}, {"captions": ["frogs croak and vocalize", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["yswmmRZFItk", "tDVADusiIoc"], "start_seconds": ["0", "60"], "properties": ["croak, vocalize, frog", "water, radio, man"], "captions_pred_video": ["a close up of a frog in the water", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a frog is croaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a frog?", "label": 1}, {"captions": ["children cry and people talk", "a car accelerates and wind blows"], "sample_ids": ["xLwHe825Zs", "u0TrcHhkPQ"], "start_seconds": ["18", "20"], "properties": ["people talk, children cry, people talk", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby cries and a woman speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "the rumbling of a bus followed by a soft male voice"], "sample_ids": ["se87d6yxEOA", "vK93VuO0yNc"], "start_seconds": ["10", "30"], "properties": ["run, whistle, pass", "male voice, bus, rumble"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "footage is blurry due to the movement of the bus as it drives through the city at night"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a car drives by with wind noise in the background "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a railroad crossing bell rings as a train horn blows", "people cheer as a vehicle engine revs"], "sample_ids": ["tZGN5a7ybxo", "xjhAnI2q6hM"], "start_seconds": ["60", "6"], "properties": ["ring, train, horn", "engine revs, vehicle, people"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a train is moving and blowing its horn ", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["people speak and tapping occurs", "people cheer as a vehicle engine revs"], "sample_ids": ["tFCUUGdREgA", "xjhAnI2q6hM"], "start_seconds": ["70", "6"], "properties": ["people, tap, speak", "engine revs, vehicle, people"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["some men converse over an engine running", "a woman and man are speaking"], "sample_ids": ["sCiy7QS1U", "vbpKkWvfOu4"], "start_seconds": ["300", "560"], "properties": ["men, converse, engine", "two people, speaking, woman, man"], "captions_pred_video": [null, "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a woman is speaking and a man is speaking"], "question": "which entity shows two people speaking?", "label": 1}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["yNtRmrn0io8", "uZesmtKZGSw"], "start_seconds": ["210", "250"], "properties": ["storm, distance, strike", "men, talk, cars"], "captions_pred_video": ["footage of a house in the middle of the night", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["rain falls and thunder roars", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more likely to be a violent event", "label": 0}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vlJS7LN2XyM", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["background, clocks, ticking", "water, radio, man"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "people applaud and hoot and chat quietly"], "sample_ids": ["voJh2gJxXhA", "wwyfGO2J4"], "start_seconds": ["50", "90"], "properties": ["music, frog, croak", "people, applaud, hoot"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", null], "captions_pred_audio": ["music is playing and crickets are chirping ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["an airplane engine spools and people speak", "some men converse over an engine running"], "sample_ids": ["wTjoRj1se3U", "sCiy7QS1U"], "start_seconds": ["390", "300"], "properties": ["airplane, engine, spool", "men, converse, engine"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", null], "captions_pred_audio": ["a jet engine is running and people are talking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows a man conversing over an engine running?", "label": 1}, {"captions": ["birds fly and flutter around", "multiple people speak and children yell while water gurgles"], "sample_ids": ["wGKgwOP3h30", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["fly, flutter, around", "multiple, people, yell"], "captions_pred_video": ["of the pigeons in the coop", null], "captions_pred_audio": ["pigeons coo and flap their wings", "a crowd of people are talking and laughing"], "question": "which entity is more active", "label": 1}, {"captions": ["a toilet flushes and water drains", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sfAvvZwdLCY", "vJ7JPEFhyLA"], "start_seconds": ["20", "16"], "properties": ["water drains, flushes, water", "three men, wind, flow"], "captions_pred_video": ["footage of the toilet in the bathroom", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water flowing", "label": 1}, {"captions": ["a infant makes noise and is excited", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["wIJK3-5y0kA", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["noise, excited, infant", "rustling, ducks, quack"], "captions_pred_video": ["of a baby playing with a cat in a dark room", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a duck quacks and a woman speaks"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a man is filing a hard object", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vveS8HT7Uog", "sSMl2vc3ek"], "start_seconds": ["100", "20"], "properties": ["a man, hard, object", "loud, multiple, distance"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", null], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a person snoring loudly"], "question": "which entity is not a person?", "label": 0}, {"captions": ["water rushes by", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["x-PeY8Yb8M4", "zFjIWfSD-4"], "start_seconds": ["300", "410"], "properties": ["water, rushes, by", "People, motor, brakes"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", null], "captions_pred_audio": ["a car is driving on a wet road ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a man speaks as crickets sing", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["ryFDPxgDOGc", "uYT5gxnyMWM"], "start_seconds": ["570", "50"], "properties": ["a, crickets, sing", "a, scream, girl"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a vehicle accelerates squealing tires", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["sd7xVssqlw", "vbZ-0lGPneg"], "start_seconds": ["50", "30"], "properties": ["accelerates, tires, squealing", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["a steam engine runs and whistles as it passes by", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["se87d6yxEOA", "xfaoyyzw2WU"], "start_seconds": ["10", "180"], "properties": ["run, whistle, pass", "loud, jet engine, roar"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "a drill drills through something then people begin laughing"], "sample_ids": ["zALy31PjDl0", "tEE3MpBt1sg"], "start_seconds": ["21", "50"], "properties": ["a man, a vehicle, a horn", "drill, something, laugh"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "people are laughing breathing and speaking with background noise "], "question": "which entity is about a drill?", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "a child speaks in closed space"], "sample_ids": ["vlS6YMeWAPo", "yW6FWLSLkx4"], "start_seconds": ["40", "40"], "properties": ["noise, bleat, call", "child, space, speak"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a goat bleats and birds chirp", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["some tunes played by whistling", "some tunes played by whistling"], "sample_ids": ["u6BnG6YZqJ4", "u6BnG6YZqJ4"], "start_seconds": ["0", "0"], "properties": ["tune, play, whistling", "tune, play, whistling"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a person whistling a song", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["wind blowing followed by a zoom", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vr8ZXjEBhMQ", "uYT5gxnyMWM"], "start_seconds": ["150", "50"], "properties": ["wind, blow, zoom", "female, spraying, scream"], "captions_pred_video": ["is taken from a motorcycle's point of view", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "frogs croak and vocalize"], "sample_ids": ["y4tPJXBKDig", "yswmmRZFItk"], "start_seconds": ["20", "0"], "properties": ["a, noise, talk", "croak, vocalize, frog"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "a close up of a frog in the water"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a frog is croaking"], "question": "which entity is a frog?", "label": 1}, {"captions": ["a baby cries and wails as an adult female speaks", "water flows and trickles"], "sample_ids": ["zliInBdC98Y", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["a, baby, cries, wails", "water, flow, trickle"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a baby cries and a woman speaks", "water is splashing and gurgling"], "question": "which entity is silent", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "a woman speaks as she rubs two objects together"], "sample_ids": ["uWPRNLnpy7Y", "vzxHnu-SFEw"], "start_seconds": ["10", "80"], "properties": ["accelerate, laugh, vehicle", "two objects, woman, speak"], "captions_pred_video": ["is taken from a car driving down the street", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a skill", "label": 1}, {"captions": ["a man speaking with light rustling", "here comes the train and it starts to blow the horn and get close"], "sample_ids": ["zOZleIRqZm4", "s7knHCFW82w"], "start_seconds": ["80", "30"], "properties": ["light, rustling, man", "blow horn, get close, train"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of the train on the tracks near a building and a car parked on the side of the road"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a train is blowing its horn and its wheels are squealing "], "question": "which entity is moving", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vZAw4apG0Es", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["background, clock, ticktocks", "rooster, crow, background, men"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a clock in the background?", "label": 0}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "people speak as gunfire rings out"], "sample_ids": ["sjlVMgdGSK0", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["accelerates, vehicle, race car", "gunfire, ring, speak"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "a telephone rings followed by a woman talking"], "sample_ids": ["vSeGhaZt-aI", "tGcFnX0GHI"], "start_seconds": ["50", "0"], "properties": ["water, bubbles, run", "ring, talk, woman"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", null], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a baby laugh at a sputter", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["sLUnaPT5gM8", "tiDFTC-5vU"], "start_seconds": ["0", "30"], "properties": ["laugh, sputter, baby", "male, duck, laugh"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", null], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man is speaking and ducks are quacking"], "question": "which entity is a person", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["wRBHTgrbiwg", "zl9Dqx-j7q4"], "start_seconds": ["50", "6"], "properties": ["bird, owl, speak", "engine, laugh, loud"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a car accelerates and wind blows", "winds blows roughly as a vehicle races past"], "sample_ids": ["u0TrcHhkPQ", "xjvTpk2Zpr8"], "start_seconds": ["20", "70"], "properties": ["accelerates, wind, blows", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["vZAw4apG0Es", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["people, clock, converse", "People, motor, brakes"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a clock ticking?", "label": 0}, {"captions": ["a jet engine spools up and takes off", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vBslzh7saPw", "ukg5L09Wpvo"], "start_seconds": ["90", "150"], "properties": ["engine, spools, takes", "clickety-clack, train, whistle"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a man speaks as a machine runs", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vD6lYD1l0BY", "vfYTJq7nU"], "start_seconds": ["330", "130"], "properties": ["a, machine, run", "rustling, ducks, quack"], "captions_pred_video": ["game controller being held in the hands of the person", null], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a duck quacks and a woman speaks"], "question": "which entity is about a machine running?", "label": 0}, {"captions": ["a person is snoring while sleeping", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vJrjSeP17yE", "vb1fPSDI4c"], "start_seconds": ["40", "30"], "properties": ["a person is sleeping, snoring, person", "multiple, people, yell"], "captions_pred_video": ["a black background with a small plane flying in the sky", null], "captions_pred_audio": ["a person snoring loudly", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a diesel truck engine runs continuously", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["sZvwOuuPGP0", "tdWhHV3X25Q"], "start_seconds": ["50", "60"], "properties": ["engine, diesel, truck", "applause, audience, yells"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a medium engine is running ", "a man is speaking and a crowd is clapping"], "question": "which is not a vehicle", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "an infant crying as a woman laughs"], "sample_ids": ["siJFXfGWgDk", "xhmRY9yhC7c"], "start_seconds": ["50", "20"], "properties": ["man, woman, vehicle", "a, laugh, infant"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a baby cries and a woman speaks"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["a girl talking, laughing and sneezing noise", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["y4tPJXBKDig", "w5W5Kqtc8E"], "start_seconds": ["20", "100"], "properties": ["a, noise, talk", "wind, blow, vehicle"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", null], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "water is sprayed across a hard surface"], "sample_ids": ["zcDwZ6W7E3E", "sQwlkXjQabo"], "start_seconds": ["180", "10"], "properties": ["man, speak, motorcycles", "water, spray, surface"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "spraying followed by silence"], "question": "which is a liquid", "label": 1}, {"captions": ["a man talks as several small engines run", "a motor runs in the distance as a soft wind periodically gusts"], "sample_ids": ["u9A6VZQCZpU", "xyL9F5VrjkE"], "start_seconds": ["30", "20"], "properties": ["a, man, talk", "wind, motor, distance"], "captions_pred_video": [null, "of a caterpillar truck loading logs into a trailer"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "the wind is blowing and a car is passing by "], "question": "which entity is about a motor?", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["x5cuQjOdM3E", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["cat, meows, young woman", "people, applaud, hoot"], "captions_pred_video": ["a black background with an airplane flying in the sky", null], "captions_pred_audio": ["a cat meows and a woman speaks", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a stream of water runs briefly", "a man speaks as water trickles down a stream"], "sample_ids": ["x-PeY8Yb8M4", "sapQIQUhFc"], "start_seconds": ["300", "280"], "properties": ["stream, water, run", "water, stream, trickles"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", null], "captions_pred_audio": ["a car is driving on a wet road ", "a man is speaking and a stream is flowing in the background "], "question": "which stream is running", "label": 0}, {"captions": ["some people speak", "several insects fly while two men talk"], "sample_ids": ["vbZ-0lGPneg", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "several, fly, men"], "captions_pred_video": ["of a man holding a baby duck in his hands", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a woman and man speak while food is frying", "people cheer as a vehicle engine revs"], "sample_ids": ["zk-xJGQU8-4", "xjhAnI2q6hM"], "start_seconds": ["130", "6"], "properties": ["food, man, woman", "engine revs, vehicle, people"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "people speak as gunfire rings out"], "sample_ids": ["tDVADusiIoc", "wqTCwqVRDlk"], "start_seconds": ["60", "80"], "properties": ["water, radio, man", "gunfire, ring, speak"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war zone", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vJ7JPEFhyLA", "xfaoyyzw2WU"], "start_seconds": ["16", "180"], "properties": ["three men, wind, flow", "loud, jet engine, roar"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a male speaks and another male speaks", "water splashes as an animal walks through"], "sample_ids": ["viuTg1M-dqg", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["two males, speaking, male", "animal, water, splashes"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "some tunes played by whistling"], "sample_ids": ["yI-KvObbDoY", "u6BnG6YZqJ4"], "start_seconds": ["260", "0"], "properties": ["sound, smack, wind", "tune, play, whistling"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["an airplane engine roars increasingly louder", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["vBslzh7saPw", "tDlysoZiA1I"], "start_seconds": ["90", "0"], "properties": ["engine, roar, louder", "animal, grunts, chirps"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a jet engine roars and accelerates ", "birds are chirping and a rooster is crowing "], "question": "which entity is quieter", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "water pouring and bubbling"], "sample_ids": ["tDVADusiIoc", "uyRfq-jKPpo"], "start_seconds": ["60", "50"], "properties": ["wind, radio, waves", "water, bubbles, pouring"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "water is running from a faucet"], "question": "which entity is more calm", "label": 1}, {"captions": ["running water in a faucet with some clinks", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["zNRChLjqcU", "zj2R0XoFr5k"], "start_seconds": ["220", "50"], "properties": ["water, faucet, run", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["water is running from a faucet into a sink", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a helicopter engine runs continuously", "wind blows as people chatter quietly"], "sample_ids": ["ugHJF0hfYkg", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["engine, running, continuously", "wind, chatter, people"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage is blurry and out of focus"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "a vehicle accelerates and squeals tires"], "sample_ids": ["xM4joTqDVp4", "yRx9txMcBl0"], "start_seconds": ["160", "40"], "properties": ["background, chirp, birds", "accelerates, tires, squeals"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a car is revving its engine and skidding "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "paper is crumpling consistently"], "sample_ids": ["x9JovgqUcs", "v5cSxLaHADY"], "start_seconds": ["500", "0"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man speaks and types on a keyboard", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["multiple insects buzz over rustling wind", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["tMJne1a4AFI", "uZesmtKZGSw"], "start_seconds": ["0", "250"], "properties": ["wind, buzz, rustling", "men, talk, cars"], "captions_pred_video": ["a swarm of bees on the ground", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "an adult male speaks and dials a rotary phone"], "sample_ids": ["y2bVZ7rz-5M", "tK4VlLsNxak"], "start_seconds": ["280", "120"], "properties": ["engine, horn, siren", "An adult male speaks, dials, and speaks into a rotary phone"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "person is wearing a headset and holding a remote control in his hand"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a man is speaking and using a sewing machine"], "question": "which entity is a vehicle", "label": 0}, {"captions": ["male speech with light ticking", "a child speaks in closed space"], "sample_ids": ["xO-Q2BlIIPU", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["male, speech, ticking", "child, space, speak"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a drill runs and two people laugh", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["tEE3MpBt1sg", "uYT5gxnyMWM"], "start_seconds": ["50", "50"], "properties": ["two people, laugh, drill", "a, scream, girl"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity has more people", "label": 1}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "a loud engine muffles a man as he speaks"], "sample_ids": ["wjsXBsc7M40", "xyx6eNVEYRY"], "start_seconds": ["10", "380"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "loud, engine, muffles"], "captions_pred_video": ["footage of the baby playing with a toothbrush", "footage of a helicopter landing on a runway at an airport"], "captions_pred_audio": ["a baby laughs and a woman speaks", "an aircraft engine is running and a man is speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["vddP56-ogds", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["water, splash, person, laugh", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking with wind noise in the background "], "question": "which entity is more likely to be in a storm", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "a vehicle accelerates and squeals tires"], "sample_ids": ["vh30P49Po6s", "yRx9txMcBl0"], "start_seconds": ["30", "40"], "properties": ["loud, continuous, quacks", "accelerates, tires, squeals"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a duck is quacking loudly", "a car is revving its engine and skidding "], "question": "which entity is louder", "label": 0}, {"captions": ["someone whistles a tune", "a train engine runs and a horn blows"], "sample_ids": ["sIXTftIuUgw", "zPX9o1uDiI"], "start_seconds": ["90", "40"], "properties": ["someone, tune, whistle", "engine, horn, run"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person whistling a song", "a train moves with its horn blowing and wheels squealing "], "question": "which entity is a train", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "wind blows and women speak as livestock vocalizes"], "sample_ids": ["wvKpEYswXO0", "vXlk0lIQBFo"], "start_seconds": ["150", "470"], "properties": ["water, tap, run", "wind, speak, vocalize"], "captions_pred_video": ["of the person preparing food in the kitchen", "- a woman and two donkeys in a fenced in area"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "wind chimes are ringing and people are speaking and laughing "], "question": "which entity is about water?", "label": 0}, {"captions": ["wind blows and women speak as livestock vocalizes", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vXlk0lIQBFo", "uEU-Hg5MTN8"], "start_seconds": ["470", "27"], "properties": ["wind, speak, vocalize", "a woman, laughs, animal"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a woman is speaking and a baby is crying"], "question": "which entity is about a woman speaking and an animal snorting?", "label": 1}, {"captions": ["a man speaking with light rustling", "winds blows roughly as a vehicle races past"], "sample_ids": ["zOZleIRqZm4", "xjvTpk2Zpr8"], "start_seconds": ["80", "70"], "properties": ["light, rustling, man", "wind, blows, vehicle"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["a dog barks and whimpers", "a child speaks in closed space"], "sample_ids": ["sShpyu2l4YQ", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["barks, whimpers, dog", "child, space, speak"], "captions_pred_video": ["the puppies are playing with a toy", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a dog is barking and growling", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["distant humming of an engine", "a infant makes noise and is excited"], "sample_ids": ["yVPZ2MNWpms", "wIJK3-5y0kA"], "start_seconds": ["0", "30"], "properties": ["sound, distance, engine", "noise, excited, infant"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a car is driving by on the road ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a propeller moves loudly nearby", "some clanking with distant murmuring"], "sample_ids": ["ugHJF0hfYkg", "uMTTDZ2mb4"], "start_seconds": ["10", "30"], "properties": ["loud, propeller, move", "clanking, murmuring, distant"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "people are talking and a car is driving by with wind noise in the background "], "question": "which is quieter", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "someone whistles a tune"], "sample_ids": ["siJFXfGWgDk", "sIXTftIuUgw"], "start_seconds": ["50", "90"], "properties": ["man, woman, vehicle", "someone, tune, whistle"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", null], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a baby cries and a woman moans", "a woman speaks as she rubs two objects together"], "sample_ids": ["smDKStoHBJo", "vzxHnu-SFEw"], "start_seconds": ["0", "80"], "properties": ["a, cry, woman", "two objects, woman, speak"], "captions_pred_video": ["a man holding a crying baby in his arms", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a woman speaking?", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "a man speaks as a motor runs in the background"], "sample_ids": ["w5W5Kqtc8E", "xZepNM9qcRA"], "start_seconds": ["100", "30"], "properties": ["wind, engine, scream", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "someone is typing on a computer keyboard"], "sample_ids": ["smDKStoHBJo", "v0x1odnXtP0"], "start_seconds": ["0", "210"], "properties": ["a, talk, baby, cry", "keyboard, type, computer"], "captions_pred_video": ["a man holding a crying baby in his arms", "how to make money on youtube in spanish"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a person is typing on a keyboard"], "question": "which is not a person", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["v0x1odnXtP0", "zj2R0XoFr5k"], "start_seconds": ["210", "50"], "properties": ["keyboard, type, computer", "airplane, boy, fly"], "captions_pred_video": ["how to make money on youtube in spanish", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a person is typing on a keyboard", "a woman speaks while a helicopter flies overhead "], "question": "which object is moving", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "wind blows as people chatter quietly"], "sample_ids": ["wyllXV6PjKo", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["a kid, talk, cry", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a woman speaks and a baby cries", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["wyllXV6PjKo", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["a kid, talk, cry", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman speaks and a baby cries", "a woman is speaking and a baby is crying"], "question": "which entity has a kid?", "label": 0}, {"captions": ["waves crash against a shoreline and people speak", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["yFB25fqfU8I", "vbZ-0lGPneg"], "start_seconds": ["300", "30"], "properties": ["wave, crash, shoreline", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a person surfing in the ocean", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "a woman is speaking and a dog is whimpering"], "question": "which entity is a video of a television program?", "label": 1}, {"captions": ["a train horn sounds as the train approaches", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["slZLHwNbbt4", "tDVADusiIoc"], "start_seconds": ["300", "60"], "properties": ["train, horn, sound", "water, radio, man"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "birds chirp and objects are moved around"], "sample_ids": ["zgUgkpk78xU", "yPUYU6t3rwo"], "start_seconds": ["70", "370"], "properties": ["horn, bells, ring", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "insects buzz and a man speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "a duck quacks continuously"], "sample_ids": ["v7jJS8aAyA", "vh30P49Po6s"], "start_seconds": ["10", "30"], "properties": ["wind, blows, loudly", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "a toilet flushes and water drains"], "sample_ids": ["tDlfY3nmx1A", "sfAvvZwdLCY"], "start_seconds": ["160", "20"], "properties": ["applause, laugh, man", "water drains, flushes, water"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a toilet is flushed"], "question": "which entity is a draining water?", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "wind blows as people chatter quietly"], "sample_ids": ["sQwlkXjQabo", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["liquid, surface, spray", "wind, chatter, people"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "footage is blurry and out of focus"], "captions_pred_audio": ["spraying followed by silence", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a male speaks and another male speaks", "someone is burping continuously"], "sample_ids": ["viuTg1M-dqg", "y636gklDioE"], "start_seconds": ["30", "20"], "properties": ["two males, speaking, male", "burps, burps, burps"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "a dog sitting on a red chair in front of an old telephone"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a person burps loudly several times"], "question": "which entity is more likely to be a prank", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "small dogs yip and bark sharply"], "sample_ids": ["xzKKf9bKNUo", "v-wcQf4BDY0"], "start_seconds": ["10", "120"], "properties": ["background, noise, snoring", "bark, yip, sharply"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a person snoring loudly", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["an insect buzzes around continuously", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["v25l1jef3JY", "vbZ-0lGPneg"], "start_seconds": ["0", "30"], "properties": ["buzzes, continuously, insect", "a woman, a television program, a bird"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a woman is speaking and a dog is whimpering"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a door slams shut and an object moves on a hard surface", "water pouring and bubbling"], "sample_ids": ["zkKdxzNC97Y", "uyRfq-jKPpo"], "start_seconds": ["27", "50"], "properties": ["hard, surface, door", "water, bubbles, pouring"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a door is opened and closed", "water is running from a faucet"], "question": "which entity is more likely to be a liquid", "label": 1}, {"captions": ["some clanking with distant murmuring", "water flows as men speak and yell"], "sample_ids": ["uMTTDZ2mb4", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["clanking, murmuring, distant", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["soTOh3zYJfY", "uZesmtKZGSw"], "start_seconds": ["40", "250"], "properties": ["vehicle, skid, tires", "men, talk, cars"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["a infant makes noise and is excited", "some men converse over an engine running"], "sample_ids": ["wIJK3-5y0kA", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["noise, excited, infant", "men, converse, engine"], "captions_pred_video": ["of a baby playing with a cat in a dark room", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is more quiet", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "some men converse over an engine running"], "sample_ids": ["ukxt9I7eMMg", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["food, pan, cook", "men, converse, engine"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a conversation?", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "a car accelerates and wind blows"], "sample_ids": ["xM4joTqDVp4", "u0TrcHhkPQ"], "start_seconds": ["160", "20"], "properties": ["background, chirp, birds", "accelerates, wind, blows"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", null], "captions_pred_audio": ["birds are chirping and a train is moving ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "an engine runs loudly"], "sample_ids": ["t69a8aRKhmc", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["a, b, c", "loud, engine, run"], "captions_pred_video": ["footage is blurry and out of focus", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a lawn mower is running and men are speaking "], "question": "which entity is quieter", "label": 0}, {"captions": ["an adult male speaks and dials a rotary phone", "some men converse over an engine running"], "sample_ids": ["tK4VlLsNxak", "sCiy7QS1U"], "start_seconds": ["120", "300"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "men, converse, engine"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", null], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a conversation between two men?", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "an airplane engine roars increasingly louder"], "sample_ids": ["vKrYfzleLB8", "vBslzh7saPw"], "start_seconds": ["110", "90"], "properties": ["a, ring, gunshots", "engine, roar, louder"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", "a pickup truck carrying a large object down the road"], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "a jet engine roars and accelerates "], "question": "which is louder", "label": 1}, {"captions": ["birds vocalize and chirp continuously", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["w1mlz3Pe4fU", "xKB8O8LTs6s"], "start_seconds": ["300", "70"], "properties": ["vocalize, chirp, continuously", "music, gunfire, explosion"], "captions_pred_video": ["of a bird in a cage", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["birds are chirping and singing", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["children speak as a female ask them questions", "several insects fly while two men talk"], "sample_ids": ["wEBlkGWVWwE", "s-T9OVOiMLo"], "start_seconds": ["260", "330"], "properties": ["female, speak, questions", "several, fly, men"], "captions_pred_video": ["shows a person writing on the whiteboard", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["there are rhythmical snoring nearby", "people speak as gunfire rings out"], "sample_ids": ["ujMt0-D-x2k", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["snoring, rhythmical, nearby", "gunfire, ring, speak"], "captions_pred_video": ["of the dog playing with a toy on the floor", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["ukxt9I7eMMg", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["food, pan, cook", "a woman, laughs, animal"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is about a woman speaking and laughing?", "label": 1}, {"captions": ["birds fly and flutter around", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["wGKgwOP3h30", "vlS6YMeWAPo"], "start_seconds": ["30", "40"], "properties": ["fly, flutter, around", "sheep, baa, birds"], "captions_pred_video": ["of the pigeons in the coop", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["pigeons coo and flap their wings", "a goat bleats and birds chirp"], "question": "which entity is a sheep?", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "some men converse over an engine running"], "sample_ids": ["ylpYOorfH4o", "sCiy7QS1U"], "start_seconds": ["410", "300"], "properties": ["motor, run, steady", "men, converse, engine"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", null], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man speaks while a motorcycle revs and accelerates "], "question": "which is a more active scene", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vzxHnu-SFEw", "zj2R0XoFr5k"], "start_seconds": ["80", "50"], "properties": ["two objects, woman, speak", "airplane, boy, fly"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a woman speaks while a helicopter flies overhead "], "question": "which object is moving", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "vehicles pass by on a roadway"], "sample_ids": ["yYEVLuqEytU", "tgbONvsP47Y"], "start_seconds": ["40", "0"], "properties": ["grunt, slurp, background", "pass, vehicle, roadway"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "footage of a fire truck entering a garage"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "an infant crying as a woman laughs"], "sample_ids": ["ziUT9IFTkjg", "xhmRY9yhC7c"], "start_seconds": ["10", "20"], "properties": ["background, birds, rustling", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a person sniffles and sneezes", "a man speaks in the background while a slow tick repeats"], "sample_ids": ["uRlbY6aoBU", "vZAw4apG0Es"], "start_seconds": ["0", "30"], "properties": ["sneezes, sniffles, person", "background, tick, repeat"], "captions_pred_video": [null, "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["a man is sneezing ", "a clock is ticking and people are talking"], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "a man speaks then blows a vehicle horn as wind blows"], "sample_ids": ["wz7N8YRy74I", "zALy31PjDl0"], "start_seconds": ["30", "21"], "properties": ["rooster, crow, background, men", "a man, a vehicle, a horn"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "a motorcycle is parked on the side of a brick walkway"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a man is speaking and a car horn is honking"], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["various birds chirp and squeal, and an animal grunts", "a man speaks as a car is passing by"], "sample_ids": ["tDlysoZiA1I", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["animal, grunts, chirps", "a, car, pass"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "a car speeding up in the distance"], "sample_ids": ["vdoxuJn9lTc", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["burp, loud, girl", "distance, car, speed"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", null], "captions_pred_audio": ["a child speaks followed by a burp", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["water flows as men speak and yell", "paper folding and crinkling"], "sample_ids": ["vJ7JPEFhyLA", "zPpG3RD8lSs"], "start_seconds": ["16", "20"], "properties": ["water, flow, men", "paper, fold, crinkle"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "the wind blows and a mouse clicks "], "question": "which entity is more still", "label": 1}, {"captions": ["a toilet flushes and water drains", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["sfAvvZwdLCY", "vfYTJq7nU"], "start_seconds": ["20", "130"], "properties": ["water drains, flushes, water", "rustling, ducks, quack"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a duck quacks and a woman speaks"], "question": "which entity is about water?", "label": 0}, {"captions": ["a airplane flies overhead as a woman speaks", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["zj2R0XoFr5k", "wDVMhEdTiVw"], "start_seconds": ["50", "30"], "properties": ["airplane, fly, woman", "gun, shoot, water"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["a child speaks", "a machine beeps continuously"], "sample_ids": ["yW6FWLSLkx4", "y682ml90jGw"], "start_seconds": ["40", "11"], "properties": ["a, child, speaks", "beeps, machine, continuously"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", null], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaking together with birds chirping and distant murmuring", "multiple people speak and children yell while water gurgles"], "sample_ids": ["uiS58TNyUiw", "vb1fPSDI4c"], "start_seconds": ["430", "30"], "properties": ["audio, man, speaking", "multiple, people, yell"], "captions_pred_video": ["of the pigeon in the cage", null], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["wqADXCzngMw", "vbZ-0lGPneg"], "start_seconds": ["340", "30"], "properties": ["engine, idle, man", "a woman, a television program, a bird"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a man talking?", "label": 0}, {"captions": ["race cars go around a track as a man commentates", "a clock ticktocks and sounds an alarm then a man laughs"], "sample_ids": ["uZesmtKZGSw", "xV7Mg1QucSc"], "start_seconds": ["250", "14"], "properties": ["car, track, man", "alarm, ticktocks, laughs"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "an alarm clock ticks and a woman laughs"], "question": "which entity has a man laugh?", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "people speak as gunfire rings out"], "sample_ids": ["uZesmtKZGSw", "wqTCwqVRDlk"], "start_seconds": ["250", "80"], "properties": ["men, talk, cars", "gunfire, ring, speak"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["zuua6-5goWw", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["birds, chirp, quiet, man, speaks", "harsh, wind, blows"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "on how to use a sewing machine youtube"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is more calm", "label": 0}, {"captions": ["people speak and tapping occurs", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["tFCUUGdREgA", "uYT5gxnyMWM"], "start_seconds": ["70", "50"], "properties": ["people, tap, speak", "female, spraying, scream"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking and tapping?", "label": 0}, {"captions": ["a beep occurs briefly", "water pouring and bubbling"], "sample_ids": ["xtWeJ56-U-g", "uyRfq-jKPpo"], "start_seconds": ["20", "50"], "properties": ["beep, occur, briefly", "water, bubbles, pouring"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "water is running from a faucet"], "question": "which entity is more likely to be heard", "label": 0}, {"captions": ["a man is filing a hard object", "a vehicle engine accelerating then running on idle"], "sample_ids": ["vveS8HT7Uog", "vYkA3cfXp5Q"], "start_seconds": ["100", "30"], "properties": ["a man, hard, object", "engine, accelerate, idle"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "an engine is idling"], "question": "which object is more likely to be a vehicle engine?", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["wRBHTgrbiwg", "uEU-Hg5MTN8"], "start_seconds": ["50", "27"], "properties": ["bird, owl, speak", "a woman, laughs, animal"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a woman is speaking and a baby is crying"], "question": "which entity has a man speaking briefly?", "label": 0}, {"captions": ["water splashes and a door squeaks", "someone is typing on a computer keyboard"], "sample_ids": ["sdXV-ylviw", "v0x1odnXtP0"], "start_seconds": ["190", "210"], "properties": ["sound, splash, door", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a dog barks and taps with background noise ", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["wind blows strongly and a young man speaks", "pigeons vocalize and birds chirp"], "sample_ids": ["vs65y4qmyBE", "uiS58TNyUiw"], "start_seconds": ["340", "430"], "properties": ["wind, blows, strongly", "vocalize, bird, chirp"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "of the pigeon in the cage"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["tDVADusiIoc", "sLUnaPT5gM8"], "start_seconds": ["60", "0"], "properties": ["wind, radio, waves", "loud, laughter, intermittent"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["running water in a faucet with some clinks", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["zNRChLjqcU", "uEU-Hg5MTN8"], "start_seconds": ["220", "27"], "properties": ["water, faucet, run", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["water is running from a faucet into a sink", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "three men talk while wind blows and some liquid flows"], "sample_ids": ["weDbePuc-Xc", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["music, slaps, human", "three men, wind, flow"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more liquid flowing", "label": 1}, {"captions": ["a dog barks and whimpers", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sShpyu2l4YQ", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["barks, whimpers, dog", "airplane, boy, fly"], "captions_pred_video": ["the puppies are playing with a toy", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a dog is barking and growling", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "an insect buzzes around continuously"], "sample_ids": ["uJV8NDaHqqk", "v25l1jef3JY"], "start_seconds": ["100", "0"], "properties": ["loud, fly, chirp", "buzzes, continuously, insect"], "captions_pred_video": ["a bee hive in a wooden box", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a swarm of bees buzzing around", "a fly is buzzing around a microphone "], "question": "which fly buzzes around loudly", "label": 0}, {"captions": ["paper folding and crinkling", "a man is snoring loudly and repeatedly"], "sample_ids": ["zPpG3RD8lSs", "sncRqQ67iJU"], "start_seconds": ["20", "460"], "properties": ["paper, fold, crinkle", "loud, repeatedly, man"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "of an airplane flying in the dark sky at night"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a person is snoring"], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["zcDwZ6W7E3E", "y8WEcpOlT3I"], "start_seconds": ["180", "40"], "properties": ["a, man, speak", "harsh, wind, blows"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a man is speaking with wind noise in the background "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["a train horn blows as it passes by", "an engine runs loudly"], "sample_ids": ["zVacuqSb4LI", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["horn, blows, train", "loud, engine, run"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a lawn mower is running and men are speaking "], "question": "which train is louder", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "continuous chugging with birds chirping in the background"], "sample_ids": ["sWZzXuWYY", "xM4joTqDVp4"], "start_seconds": ["420", "160"], "properties": ["male, clanks, thumps", "background, chirp, birds"], "captions_pred_video": [null, "footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "birds are chirping and a train is moving "], "question": "which entity has a male speaking?", "label": 0}, {"captions": ["a toilet flushes and water sputters as it drains", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["smGI3C1NZc", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["water, drain, toilet", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a toilet is flushed", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a moving object", "label": 1}, {"captions": ["motors runs briefly and tires screech", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["yRx9txMcBl0", "zFjIWfSD-4"], "start_seconds": ["40", "410"], "properties": ["motors, tires, screech", "People, motor, brakes"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", null], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running and air brakes hissing?", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "a clock ticktocks"], "sample_ids": ["wRBHTgrbiwg", "v-g-j2uTByM"], "start_seconds": ["50", "30"], "properties": ["bird, owl, speak", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "a propeller rotates loudly and intensely"], "sample_ids": ["uYT5gxnyMWM", "ugHJF0hfYkg"], "start_seconds": ["50", "10"], "properties": ["female, spraying, scream", "loud, intense, propeller"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["an electric engine works nearby followed by a child talking", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["xSKJGCItUWE", "zl9Dqx-j7q4"], "start_seconds": ["10", "6"], "properties": ["engine, work, child", "engine, laugh, loud"], "captions_pred_video": ["footage of the helicopter flying in the room", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a jet engine roars "], "question": "which entity is followed by a man laughing", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tOj4tdLRaA", "zj2R0XoFr5k"], "start_seconds": ["70", "50"], "properties": ["woman, laugh, baby", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a person speaking and a baby laughing?", "label": 0}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "a car speeding up in the distance"], "sample_ids": ["w8uLijTqtlU", "u0TrcHhkPQ"], "start_seconds": ["70", "20"], "properties": ["wind, microphone, noise", "distance, car, speed"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["the wind is blowing strongly", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a woman speaks as she rubs two objects together"], "sample_ids": ["yYEVLuqEytU", "vzxHnu-SFEw"], "start_seconds": ["40", "80"], "properties": ["animal, pig, background", "two objects, woman, speak"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["a child speaks", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["yW6FWLSLkx4", "sLUnaPT5gM8"], "start_seconds": ["40", "0"], "properties": ["a, child, speaks", "loud, laughter, intermittent"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["multiple ducks quack continuously", "a stream of water runs briefly"], "sample_ids": ["wfHeoPDLMaM", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["multiple, quack, continuously", "stream, water, run"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["ducks are quacking", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a vehicle accelerates squealing tires", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["sd7xVssqlw", "yDoT73BWsdA"], "start_seconds": ["50", "10"], "properties": ["accelerates, tires, squealing", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a race car accelerates and revs its engine "], "question": "which vehicle is moving faster", "label": 0}, {"captions": ["a person whistles a meandering tune", "a stream of water runs briefly"], "sample_ids": ["uFoga8sHpiw", "x-PeY8Yb8M4"], "start_seconds": ["90", "300"], "properties": ["person, tune, whistle", "stream, water, run"], "captions_pred_video": ["footage of a bird in a cage", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a person whistles a song", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a person sniffles and then sneezes in the distance", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["uRlbY6aoBU", "tiDFTC-5vU"], "start_seconds": ["0", "30"], "properties": ["a, distance, sneeze", "male, duck, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is sneezing ", "a man is speaking and ducks are quacking"], "question": "which entity is a person?", "label": 0}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "a stream of water runs briefly"], "sample_ids": ["wnpJndXuxLc", "x-PeY8Yb8M4"], "start_seconds": ["50", "300"], "properties": ["beeps, loud, whistle", "stream, water, run"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "a stream of water runs briefly"], "sample_ids": ["vms5XGTDVQc", "x-PeY8Yb8M4"], "start_seconds": ["220", "300"], "properties": ["paper, crumpled, crinkled", "stream, water, run"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["paper is crumpled and crinkled", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "frogs croak and vocalize"], "sample_ids": ["vmrxwuAMb2I", "yswmmRZFItk"], "start_seconds": ["40", "0"], "properties": ["a dog, inhales, exhales", "croak, vocalize, frog"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "a close up of a frog in the water"], "captions_pred_audio": ["a dog barks and growls", "a frog is croaking"], "question": "which animal is vocalizing", "label": 1}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "a infant makes noise and is excited"], "sample_ids": ["smDKStoHBJo", "wIJK3-5y0kA"], "start_seconds": ["0", "30"], "properties": ["a, infant, speaking", "noise, excited, infant"], "captions_pred_video": ["a man holding a crying baby in his arms", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a baby cries and a woman speaks"], "question": "which infant is making noise", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "water flows as men speak and yell"], "sample_ids": ["y2ZBGpgbhHM", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["birds, tweet, pant", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["birds chirping and a dog panting", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water flowing?", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["wqN6IIHw3po", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["rain, surface, fall", "engine, idle, woman"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking and water is splashing", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a person sniffs and sneezes", "multiple birds chirp and an animal grunts"], "sample_ids": ["uRlbY6aoBU", "tDlysoZiA1I"], "start_seconds": ["0", "0"], "properties": ["sneezes, person, sniffs", "animal, grunt, multiple"], "captions_pred_video": [null, "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a man is sneezing ", "birds are chirping and a rooster is crowing "], "question": "which entity is a person", "label": 0}, {"captions": ["a person snoring", "people applaud and hoot and chat quietly"], "sample_ids": ["t8tv5YRMJUg", "wwyfGO2J4"], "start_seconds": ["0", "90"], "properties": ["a person, snore, loud", "people, applaud, hoot"], "captions_pred_video": ["of a man getting his face licked by another man", null], "captions_pred_audio": ["a person sniffs and breathes heavily", "people are clapping and speaking with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["wqADXCzngMw", "uZesmtKZGSw"], "start_seconds": ["340", "250"], "properties": ["engine, idle, man", "men, talk, cars"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about a man talking to an engine?", "label": 0}, {"captions": ["a vehicle engine runs and someone speaks", "wind blowing followed by a zoom"], "sample_ids": ["zF8yoL0rkbI", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["engine, run, someone", "wind, blow, zoom"], "captions_pred_video": ["footage of the traffic on the street at night", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["birds chirp and wind blows", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sxIvBMSavMQ", "yajyRTUQk3U"], "start_seconds": ["210", "400"], "properties": ["birds, chirp, wind", "a woman, something, fried"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "- a woman cooking in the kitchen"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "bees buzz as wind blows"], "sample_ids": ["zsLxS-uLJTw", "tMJne1a4AFI"], "start_seconds": ["20", "0"], "properties": ["horn, blast, train", "bees, buzz, wind"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "a swarm of bees on the ground"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a swarm of bees buzzing around"], "question": "which entity is buzzing", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "vehicles pass by on a roadway"], "sample_ids": ["zgUgkpk78xU", "tgbONvsP47Y"], "start_seconds": ["70", "0"], "properties": ["horn, bells, ring", "pass, vehicle, roadway"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a car is driving on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a baby laugh at a sputter", "wind blows as people chatter quietly"], "sample_ids": ["sLUnaPT5gM8", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["laugh, sputter, baby", "wind, chatter, people"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "footage is blurry and out of focus"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "a airplane flies overhead as a woman speaks"], "sample_ids": ["uWAAAL4CIoc", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["a woman, chirps, animal", "airplane, fly, woman"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is more likely to be flying", "label": 1}, {"captions": ["a man speaks as water trickles down a stream", "a man speaks as birds chirp and a vehicle passes nearby"], "sample_ids": ["sapQIQUhFc", "siJFXfGWgDk"], "start_seconds": ["280", "50"], "properties": ["water, stream, trickles", "a, bird, vehicle"], "captions_pred_video": [null, "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a man is speaking and birds are chirping in the background "], "question": "which entity is about a man speaking as water trickles down a stream?", "label": 0}, {"captions": ["a horn blasts as warning bells ring", "a muffled toilet flushes and the water drains"], "sample_ids": ["zgUgkpk78xU", "sfAvvZwdLCY"], "start_seconds": ["70", "20"], "properties": ["horn, bells, ring", "flushes, drains, water"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a toilet is flushed"], "question": "which entity is silent", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["yswmmRZFItk", "xKB8O8LTs6s"], "start_seconds": ["0", "70"], "properties": ["background, frog, croak", "music, gunfire, explosion"], "captions_pred_video": ["a close up of a frog in the water", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a frog is croaking", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["an infant crying frantically", "paper folding and crinkling"], "sample_ids": ["zwOBqeFTgiU", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["cry, infant, frantically", "paper, fold, crinkle"], "captions_pred_video": ["of the baby crying in the car seat", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a baby cries loudly", "the wind blows and a mouse clicks "], "question": "which entity is not a person", "label": 1}, {"captions": ["a person speaks briefly", "plastic is tapped on while someone speaks"], "sample_ids": ["zOZleIRqZm4", "wvKpEYswXO0"], "start_seconds": ["80", "150"], "properties": ["person, talk, brief", "plastic, tap, speak"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is a person talking?", "label": 0}, {"captions": ["a clock ticktocks", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["v-g-j2uTByM", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["ticktocks, clock, ticktocks", "wind, blow, vehicle"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", null], "captions_pred_audio": ["a clock is ticking loudly", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is not a clock?", "label": 1}, {"captions": ["someone whistles a song", "a duck quacks continuously"], "sample_ids": ["sIXTftIuUgw", "vh30P49Po6s"], "start_seconds": ["90", "30"], "properties": ["someone, song, whistle", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a person whistling a song", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "a toilet flushes and a female speaks"], "sample_ids": ["vzceMbklWc", "yaln9y8I7ms"], "start_seconds": ["180", "230"], "properties": ["water, faucet, sink", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["water is running and a man is speaking", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 0}, {"captions": ["a toilet flushes and a female speaks", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["yaln9y8I7ms", "su6FAOcOA8c"], "start_seconds": ["230", "4"], "properties": ["female, flushes, toilet", "engine, idle, woman"], "captions_pred_video": ["footage is blurry and out of focus", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman speaking while a toilet flushes?", "label": 0}, {"captions": ["an airplane accelerates briefly", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zjTG0gaGCUI", "yajyRTUQk3U"], "start_seconds": ["80", "400"], "properties": ["accelerates, airplane, briefly", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a woman is speaking while food is frying in the background"], "question": "which object is fried", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "a motorcycle engine is idling"], "sample_ids": ["x9JovgqUcs", "vZAqdHZ81yA"], "start_seconds": ["500", "180"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "engine, motorcycle, idling"], "captions_pred_video": [null, "a motorcycle is parked on the side of the road with its rear end facing the viewer"], "captions_pred_audio": ["a man speaks and types on a keyboard", "an engine is idling loudly"], "question": "which entity is a vehicle", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "a telephone rings followed by a woman talking"], "sample_ids": ["vJvryTwuAV8", "tGcFnX0GHI"], "start_seconds": ["16", "0"], "properties": ["audience, cheer, man", "ring, talk, woman"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", null], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["birds chirp and pigeons vocalize while walking around", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["wIvYjuR3nrg", "zl9Dqx-j7q4"], "start_seconds": ["9", "6"], "properties": ["birds, pigeons, vocalize", "engine, laugh, loud"], "captions_pred_video": ["footage of a pigeon sitting on a roof with trees in the background", "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds are chirping and cooing", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["xfudFO976zE", "wqZ135Ssz0"], "start_seconds": ["0", "60"], "properties": ["animal, bleats, cry", "two men, woman, birds"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is talking", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "an infant crying as a woman laughs"], "sample_ids": ["wRV8yMk886E", "xhmRY9yhC7c"], "start_seconds": ["0", "20"], "properties": ["liquid, spray, nozzle", "a, laugh, infant"], "captions_pred_video": ["two cars are parked in a parking lot at night", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["wind blows strongly", "wind blows and women speak as livestock vocalizes"], "sample_ids": ["w8uLijTqtlU", "vXlk0lIQBFo"], "start_seconds": ["70", "470"], "properties": ["wind, blows, strongly", "wind, speak, vocalize"], "captions_pred_video": ["footage is blurry and shaky", "- a woman and two donkeys in a fenced in area"], "captions_pred_audio": ["the wind is blowing strongly", "wind chimes are ringing and people are speaking and laughing "], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "a young woman speaks over spraying and another person yells"], "sample_ids": ["shmR4OZtzqA", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["man, engine, idle", "person, spray, yell"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man speaks while a motor runs", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["a person screams glaringly", "dishes cling together then a man begins to speak"], "sample_ids": ["xC8kbrKJmco", "sQGXqGcwOTc"], "start_seconds": ["0", "3"], "properties": ["glaringly, screams, person", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a goat is bleating ", "mechanisms are operating and water is splashing "], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["su6FAOcOA8c", "uYT5gxnyMWM"], "start_seconds": ["4", "50"], "properties": ["engine, idle, woman", "female, spraying, scream"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a woman speaking?", "label": 0}, {"captions": ["a vehicle engine runs and wind blows before women yell", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["w5W5Kqtc8E", "vfYTJq7nU"], "start_seconds": ["100", "130"], "properties": ["wind, blow, vehicle", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["a baby cries and a woman moans", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["smDKStoHBJo", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["a, cry, woman", "airplane, boy, fly"], "captions_pred_video": ["a man holding a crying baby in his arms", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "a telephone rings followed by a woman talking"], "sample_ids": ["sDSppXIlJrs", "tGcFnX0GHI"], "start_seconds": ["27", "0"], "properties": ["microphone, water, wind", "ring, talk, woman"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", null], "captions_pred_audio": ["the wind is blowing and water is splashing", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a recording", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "an aircraft engine runs as people speak"], "sample_ids": ["wqADXCzngMw", "wTjoRj1se3U"], "start_seconds": ["340", "390"], "properties": ["engine, idle, man", "engine, run, people"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a jet engine is running and people are talking"], "question": "which entity has a man talking to it?", "label": 0}, {"captions": ["water pouring and bubbling", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["uyRfq-jKPpo", "vfYTJq7nU"], "start_seconds": ["50", "130"], "properties": ["water, bubbles, pouring", "rustling, ducks, quack"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", null], "captions_pred_audio": ["water is running from a faucet", "a duck quacks and a woman speaks"], "question": "which entity is about water?", "label": 0}, {"captions": ["a vehicle engine runs and someone speaks", "dishes cling together then a man begins to speak"], "sample_ids": ["zF8yoL0rkbI", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["engine, run, someone", "cling, speak, dishes"], "captions_pred_video": ["footage of the traffic on the street at night", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "mechanisms are operating and water is splashing "], "question": "which entity has a man speaking?", "label": 1}, {"captions": ["birds vocalize and chirp continuously", "children cheer as a man speaks then an audience screams"], "sample_ids": ["w1mlz3Pe4fU", "vJvryTwuAV8"], "start_seconds": ["300", "16"], "properties": ["vocalize, chirp, continuously", "audience, cheer, man"], "captions_pred_video": ["of a bird in a cage", "a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd"], "captions_pred_audio": ["birds are chirping and singing", "a man is speaking and a crowd is shouting and whooping "], "question": "which entity is a group of people", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "running water in a faucet with some clinks"], "sample_ids": ["vqZuVbG6-HI", "zNRChLjqcU"], "start_seconds": ["130", "220"], "properties": ["background, male, female", "water, faucet, run"], "captions_pred_video": ["footage is blurry because it's raining outside", null], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "water is running from a faucet into a sink"], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks as a car is passing by", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sK4u5T8hW78", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["a, car, pass", "three men, wind, flow"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more moving parts", "label": 1}, {"captions": ["water running down a sink while a man is talking", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vSeGhaZt-aI", "sSMl2vc3ek"], "start_seconds": ["50", "20"], "properties": ["water, sink, talk", "loud, multiple, distance"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", null], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a door opens and closes", "a vehicle engine accelerating then running on idle"], "sample_ids": ["vBHyYJ8pL0", "vYkA3cfXp5Q"], "start_seconds": ["2", "30"], "properties": ["open, close, door", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "an engine is idling"], "question": "which entity is a moving object", "label": 1}, {"captions": ["someone snores nearby", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["spJCm8tD9Zo", "zj2R0XoFr5k"], "start_seconds": ["90", "50"], "properties": ["someone snores, nearby, someone", "airplane, boy, fly"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a person is snoring loudly", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "water flows as men speak and yell"], "sample_ids": ["y2bVZ7rz-5M", "vJ7JPEFhyLA"], "start_seconds": ["280", "16"], "properties": ["engine, horn, siren", "water, flow, men"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a vehicle", "label": 0}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "a group of people chatter and talk as multiple horns honk in the background"], "sample_ids": ["su6FAOcOA8c", "yLy-WycbVVE"], "start_seconds": ["4", "30"], "properties": ["engine, idle, woman", "background, people, talk"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "a soccer field in a stadium with yellow and red seats"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a man is speaking and a church bell is ringing with wind noise in the background "], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["a door slams shut and an object moves on a hard surface", "a man speaks in the background while a slow tick repeats"], "sample_ids": ["zkKdxzNC97Y", "vZAw4apG0Es"], "start_seconds": ["27", "30"], "properties": ["hard, surface, door", "background, tick, repeat"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["a door is opened and closed", "a clock is ticking and people are talking"], "question": "which entity is more calm", "label": 1}, {"captions": ["birds chirp and objects are moved around", "a man speaks as a car is passing by"], "sample_ids": ["yPUYU6t3rwo", "sK4u5T8hW78"], "start_seconds": ["370", "30"], "properties": ["birds chirp, objects are moved around, birds", "a, car, pass"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["insects buzz and a man speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["tDlfY3nmx1A", "ukg5L09Wpvo"], "start_seconds": ["160", "150"], "properties": ["applause, laugh, man", "clickety-clack, train, whistle"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "dishes cling together then a man begins to speak"], "sample_ids": ["uEU-Hg5MTN8", "sQGXqGcwOTc"], "start_seconds": ["27", "3"], "properties": ["animal, grunts, snorts", "cling, speak, dishes"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "mechanisms are operating and water is splashing "], "question": "which entity is about a woman speaking and laughing and an animal grunts and snorts?", "label": 0}, {"captions": ["a person sniffs and sneezes", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["uRlbY6aoBU", "y8WEcpOlT3I"], "start_seconds": ["0", "40"], "properties": ["sneezes, person, sniffs", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is sneezing ", "a man is speaking with wind noise in the background "], "question": "which entity is more likely to be a person", "label": 0}, {"captions": ["a vehicle engine accelerating then running on idle", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vYkA3cfXp5Q", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["engine, accelerate, idle", "a woman, laughs, animal"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["an engine is idling", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["u7C-AEBQM", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["ticks, rhythmic, quiet", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a ticktock of a clock", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "water flows as men speak and yell"], "sample_ids": ["vs65y4qmyBE", "vJ7JPEFhyLA"], "start_seconds": ["340", "16"], "properties": ["engine, run, man", "water, flow, men"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["children speak as a female ask them questions", "water is sprayed across a hard surface"], "sample_ids": ["wEBlkGWVWwE", "sQwlkXjQabo"], "start_seconds": ["260", "10"], "properties": ["female, speak, questions", "water, spray, surface"], "captions_pred_video": ["shows a person writing on the whiteboard", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["someone is burping continuously", "a car speeding up in the distance"], "sample_ids": ["y636gklDioE", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["burps, burps, burps", "distance, car, speed"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", null], "captions_pred_audio": ["a person burps loudly several times", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["several insects fly while two men talk", "some tunes played by whistling"], "sample_ids": ["s-T9OVOiMLo", "u6BnG6YZqJ4"], "start_seconds": ["330", "0"], "properties": ["several, fly, men", "tune, play, whistling"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "a car accelerates and wind blows"], "sample_ids": ["tDVADusiIoc", "u0TrcHhkPQ"], "start_seconds": ["60", "20"], "properties": ["man, radio, blows", "accelerates, wind, blows"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vfYTJq7nU", "tiDFTC-5vU"], "start_seconds": ["130", "30"], "properties": ["rustling, ducks, quack", "male, duck, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a duck quacks and a woman speaks", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking as others laugh?", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "paper folding and crinkling"], "sample_ids": ["tK4VlLsNxak", "zPpG3RD8lSs"], "start_seconds": ["120", "20"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "paper, fold, crinkle"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "the wind blows and a mouse clicks "], "question": "which entity is a demonstration of a skill", "label": 1}, {"captions": ["bees buzz and wind blows", "a machine engine runs and a man speaks"], "sample_ids": ["tMJne1a4AFI", "vs65y4qmyBE"], "start_seconds": ["0", "340"], "properties": ["bees buzz, wind blows, bees", "engine, run, man"], "captions_pred_video": ["a swarm of bees on the ground", "a car is engulfed in flames on the side of the road"], "captions_pred_audio": ["a swarm of bees buzzing around", "a heavy engine is running and men are speaking "], "question": "which entity is a machine?", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["xjhAnI2q6hM", "wDVMhEdTiVw"], "start_seconds": ["6", "30"], "properties": ["engine revs, vehicle, people", "gun, shoot, water"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a gun?", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "some tunes played by whistling"], "sample_ids": ["zuua6-5goWw", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["birds, chirp, quiet, man, speaks", "tune, play, whistling"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a person whistling a song"], "question": "which entity is playing a tune", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "a person snores loudly multiple times at a close distance"], "sample_ids": ["uEU-Hg5MTN8", "sSMl2vc3ek"], "start_seconds": ["27", "20"], "properties": ["animal, grunts, snorts", "loud, multiple, distance"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "people cheer as a vehicle engine revs"], "sample_ids": ["v7jJS8aAyA", "xjhAnI2q6hM"], "start_seconds": ["10", "6"], "properties": ["wind, blows, loudly", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["birds chirp then an animal grunts", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["tDlysoZiA1I", "vlS6YMeWAPo"], "start_seconds": ["0", "40"], "properties": ["animal, grunt, chirp", "sheep, baa, birds"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a goat bleats and birds chirp"], "question": "which entity is a sheep?", "label": 1}, {"captions": ["a kid speaks followed by music playing", "water splashes as an animal walks through"], "sample_ids": ["tQWGZLItBXk", "w1ir-sZ3Im8"], "start_seconds": ["170", "90"], "properties": ["music, kid, speak", "animal, water, splashes"], "captions_pred_video": ["worms revolution screenshots", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sQGXqGcwOTc", "wqZ135Ssz0"], "start_seconds": ["3", "60"], "properties": ["audio, kid, giggles", "two men, woman, birds"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", null], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["an audience gives applause as a man yells and a group sings", "a woman speaks as she rubs two objects together"], "sample_ids": ["tdWhHV3X25Q", "vzxHnu-SFEw"], "start_seconds": ["60", "80"], "properties": ["applause, audience, yells", "two objects, woman, speak"], "captions_pred_video": ["a man is talking to another man on a stage in front of a microphone", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a skill", "label": 1}, {"captions": ["water splashes and a door squeaks", "several insects fly while two men talk"], "sample_ids": ["sdXV-ylviw", "s-T9OVOiMLo"], "start_seconds": ["190", "330"], "properties": ["sound, splash, door", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a dog barks and taps with background noise ", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more flying insects", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "an infant crying frantically"], "sample_ids": ["x9JovgqUcs", "zwOBqeFTgiU"], "start_seconds": ["500", "30"], "properties": ["a, man, speaks, keyboard", "cry, infant, frantically"], "captions_pred_video": [null, "of the baby crying in the car seat"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["ziUT9IFTkjg", "zj2R0XoFr5k"], "start_seconds": ["10", "50"], "properties": ["background, birds, rustling", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a man speaks while water drains", "a child speaks in closed space"], "sample_ids": ["vSeGhaZt-aI", "yW6FWLSLkx4"], "start_seconds": ["50", "40"], "properties": ["water, drain, man", "child, space, speak"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a stream of water runs briefly", "a vehicle engine accelerating then running on idle"], "sample_ids": ["x-PeY8Yb8M4", "vYkA3cfXp5Q"], "start_seconds": ["300", "30"], "properties": ["stream, water, run", "engine, accelerate, idle"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a car is driving on a wet road ", "an engine is idling"], "question": "which entity is a source of water", "label": 0}, {"captions": ["a toilet flushes and a female speaks", "popping and crackling repeats as men yell and laugh"], "sample_ids": ["yaln9y8I7ms", "rqu8iB22IY"], "start_seconds": ["230", "5"], "properties": ["female, flushes, toilet", "sound, repeats, laugh"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a toilet flushes and a man speaks", "a dog barks and a man speaks while music plays "], "question": "which entity has a female speaking?", "label": 0}, {"captions": ["a telephone rings and a bird vocalizes", "a small musical boom and then birds tweet and a few dogs pant"], "sample_ids": ["skd2PphS6oI", "y2ZBGpgbhHM"], "start_seconds": ["190", "30"], "properties": ["ring, bird, vocalize", "birds, tweet, pant"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", null], "captions_pred_audio": ["a telephone bell rings repeatedly ", "birds chirping and a dog panting"], "question": "which entity has more birds", "label": 1}, {"captions": ["an insect buzzes around continuously", "a propeller rotates loudly and intensely"], "sample_ids": ["v25l1jef3JY", "ugHJF0hfYkg"], "start_seconds": ["0", "10"], "properties": ["buzzes, continuously, insect", "loud, intense, propeller"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "a stream of water runs briefly"], "sample_ids": ["w5W5Kqtc8E", "x-PeY8Yb8M4"], "start_seconds": ["100", "300"], "properties": ["wind, engine, scream", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "a vehicle engine revs and tires squeal"], "sample_ids": ["zl9Dqx-j7q4", "yDoT73BWsdA"], "start_seconds": ["6", "10"], "properties": ["motors rev, laugh, loudly", "engine revs, tires squeal, vehicle"], "captions_pred_video": ["footage of a man driving a car in the dark", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a jet engine roars ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vJvryTwuAV8", "wz7N8YRy74I"], "start_seconds": ["16", "30"], "properties": ["audience, cheer, man", "rooster, crow, background, men"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "someone is typing on a computer keyboard"], "sample_ids": ["xNMovAf3o50", "v0x1odnXtP0"], "start_seconds": ["0", "210"], "properties": ["rain, thunder, music", "keyboard, type, computer"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", "how to make money on youtube in spanish"], "captions_pred_audio": ["thunder and rain with music playing in the background ", "a person is typing on a keyboard"], "question": "which entity is a person", "label": 1}, {"captions": ["a person snoring", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["t8tv5YRMJUg", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["a person, snore, loud", "female, spraying, scream"], "captions_pred_video": ["of a man getting his face licked by another man", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a person sniffs and breathes heavily", "a woman is speaking and a baby is crying"], "question": "which entity is louder", "label": 1}, {"captions": ["an engine runs loudly", "an airplane engine runs"], "sample_ids": ["vqZuVbG6-HI", "yVPZ2MNWpms"], "start_seconds": ["130", "0"], "properties": ["loud, engine, run", "engine, airplane, runs"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a car is driving by on the road "], "question": "which entity is running", "label": 1}, {"captions": ["someone snores nearby", "a man speaks as a car is passing by"], "sample_ids": ["spJCm8tD9Zo", "sK4u5T8hW78"], "start_seconds": ["90", "30"], "properties": ["someone snores, nearby, someone", "a, car, pass"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "water flows as men speak and yell"], "sample_ids": ["xNMovAf3o50", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["rain, thunder, music", "water, flow, men"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["thunder and rain with music playing in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "people speak as gunfire rings out"], "sample_ids": ["vzceMbklWc", "wqTCwqVRDlk"], "start_seconds": ["180", "80"], "properties": ["water, faucet, sink", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["water is running and a man is speaking", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a man speaks as water trickles down a stream", "vehicles pass by on a roadway"], "sample_ids": ["sapQIQUhFc", "tgbONvsP47Y"], "start_seconds": ["280", "0"], "properties": ["water, stream, trickles", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a infant makes noise and is excited", "a vehicle engine accelerating then running on idle"], "sample_ids": ["wIJK3-5y0kA", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["noise, excited, infant", "engine, accelerate, idle"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a baby cries and a woman speaks", "an engine is idling"], "question": "which entity is a machine", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["zY3icUyMdh8", "zFjIWfSD-4"], "start_seconds": ["20", "410"], "properties": ["dog, bark, engine", "People, motor, brakes"], "captions_pred_video": ["footage of a bus driving through a residential street at night", null], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a vehicle?", "label": 0}, {"captions": ["a man talks as something metal hits against and glass is set down", "a telephone rings followed by a woman talking"], "sample_ids": ["x6ijhqRY38s", "tGcFnX0GHI"], "start_seconds": ["250", "0"], "properties": ["something metal, glass, hit", "ring, talk, woman"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", null], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a woman speaks with water running", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wTideSjRFS0", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["water, running, woman", "music, gunfire, explosion"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more action", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "a train horn blows as it passes by"], "sample_ids": ["zofjfKhqLk8", "zVacuqSb4LI"], "start_seconds": ["10", "30"], "properties": ["noise, stop, motor", "horn, blows, train"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "water pouring and bubbling"], "sample_ids": ["vSeGhaZt-aI", "uyRfq-jKPpo"], "start_seconds": ["50", "50"], "properties": ["water, bubbles, run", "water, bubbles, pouring"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "water is running from a faucet"], "question": "which entity has more water", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "a toilet flushes and water drains"], "sample_ids": ["t8CV69hcvF0", "sfAvvZwdLCY"], "start_seconds": ["210", "20"], "properties": ["person, sneeze, follow", "water drains, flushes, water"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a woman sneezes and speaks", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a infant makes noise and is excited", "birds vocalize and chirp continuously"], "sample_ids": ["wIJK3-5y0kA", "w1mlz3Pe4fU"], "start_seconds": ["30", "300"], "properties": ["noise, excited, infant", "vocalize, chirp, continuously"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "of a bird in a cage"], "captions_pred_audio": ["a baby cries and a woman speaks", "birds are chirping and singing"], "question": "which entity is more vocal", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "a infant makes noise and is excited"], "sample_ids": ["voJh2gJxXhA", "wIJK3-5y0kA"], "start_seconds": ["50", "30"], "properties": ["music, frog, croak", "noise, excited, infant"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["music is playing and crickets are chirping ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["vehicles pass by on a roadway", "people speak as gunfire rings out"], "sample_ids": ["tgbONvsP47Y", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["pass, vehicle, roadway", "gunfire, ring, speak"], "captions_pred_video": ["footage of a fire truck entering a garage", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a car is driving on the road ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war", "label": 1}, {"captions": ["a woman sneezes then speaks", "a man speaks in the background while a slow tick repeats"], "sample_ids": ["x4dZyf9Gbj0", "vZAw4apG0Es"], "start_seconds": ["130", "30"], "properties": ["sneezes, speaks, woman", "background, tick, repeat"], "captions_pred_video": ["footage is blurry and out of focus", "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["a woman sneezes and speaks", "a clock is ticking and people are talking"], "question": "which entity is a man speaking in the background?", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["w34HjHr6gAY", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["beeps, squawk, child speaking", "a woman, a television program, a bird"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["a goat screams and people speak in the background", "a car speeding up in the distance"], "sample_ids": ["xC8kbrKJmco", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["background, goat, scream", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a goat is bleating ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "small dogs yip and bark sharply"], "sample_ids": ["wRBHTgrbiwg", "v-wcQf4BDY0"], "start_seconds": ["50", "120"], "properties": ["bird, owl, speak", "bark, yip, sharply"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "pigeons vocalize and birds chirp"], "sample_ids": ["xV7Mg1QucSc", "uiS58TNyUiw"], "start_seconds": ["14", "430"], "properties": ["alarm, ticktocks, laughs", "vocalize, bird, chirp"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "of the pigeon in the cage"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "a vehicle engine accelerating then running on idle"], "sample_ids": ["y8dSeubCNI", "vYkA3cfXp5Q"], "start_seconds": ["4", "30"], "properties": ["men, women, car", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["an engine revving and people talking in the background", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "some tunes played by whistling"], "sample_ids": ["uEU-Hg5MTN8", "u6BnG6YZqJ4"], "start_seconds": ["27", "0"], "properties": ["animal, grunts, snorts", "tune, play, whistling"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a toilet flushes and water drains", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["sfAvvZwdLCY", "y8WEcpOlT3I"], "start_seconds": ["20", "40"], "properties": ["water drains, flushes, water", "harsh, wind, blows"], "captions_pred_video": ["footage of the toilet in the bathroom", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking with wind noise in the background "], "question": "which entity is a source of water", "label": 0}, {"captions": ["some liquid flows while a woman laughs and man talks", "people cheer as a vehicle engine revs"], "sample_ids": ["vddP56-ogds", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["liquid, laughs, man", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a truck is revving its engine and a man is speaking "], "question": "which entity has more people", "label": 1}, {"captions": ["birds tweet and squawk", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["w1mlz3Pe4fU", "zl9Dqx-j7q4"], "start_seconds": ["300", "6"], "properties": ["squawk, tweet, scream", "engine, laugh, loud"], "captions_pred_video": ["of a bird in a cage", "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds are chirping and singing", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "a woman speaks happily and an animal chirps"], "sample_ids": ["zofjfKhqLk8", "uWAAAL4CIoc"], "start_seconds": ["10", "0"], "properties": ["background, metal, clank", "a woman, chirps, animal"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a baby laugh at a sputter", "water flows and trickles"], "sample_ids": ["sLUnaPT5gM8", "tB7hWb9gTuQ"], "start_seconds": ["0", "30"], "properties": ["laugh, sputter, baby", "water, flow, trickle"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["siJFXfGWgDk", "uZesmtKZGSw"], "start_seconds": ["50", "250"], "properties": ["a, bird, vehicle", "men, talk, cars"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["sWZzXuWYY", "uEU-Hg5MTN8"], "start_seconds": ["420", "27"], "properties": ["male, clanks, thumps", "animal, grunts, snorts"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a woman is speaking and a baby is crying"], "question": "which entity has a higher pitch", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["wTideSjRFS0", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["food, sizzle, woman", "two men, woman, birds"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["yeFvk9x0wWI", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["clack, bird, chirp", "rooster, crow, background, men"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man speaking to a rooster?", "label": 1}, {"captions": ["people speak as gunfire rings out", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["wqTCwqVRDlk", "y8WEcpOlT3I"], "start_seconds": ["80", "40"], "properties": ["gunfire, ring, speak", "harsh, wind, blows"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking and a gun is fired", "a man is speaking with wind noise in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "music plays and animals vocalize as a cartoon character makes sounds"], "sample_ids": ["xl2PIWyXaM", "weDbePuc-Xc"], "start_seconds": ["160", "40"], "properties": ["chirp, man, younger person", "cartoon character, music, vocalize"], "captions_pred_video": [null, "a cartoon frog and a butterfly are sitting on the ground next to each other"], "captions_pred_audio": ["birds are chirping and people are talking", "a man is speaking and birds are chirping with a frog croaking in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sG7TyPnFDR0", "sLUnaPT5gM8"], "start_seconds": ["180", "0"], "properties": ["beeps, machine, smoke alarm", "loud, laughter, intermittent"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a child babbles as a woman speaks", "a man speaks as a machine runs"], "sample_ids": ["wEBlkGWVWwE", "vD6lYD1l0BY"], "start_seconds": ["260", "330"], "properties": ["a, babble, woman", "a, machine, run"], "captions_pred_video": ["shows a person writing on the whiteboard", "game controller being held in the hands of the person"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man is speaking and dishes are being washed "], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "wind blows and a vehicle blows a hard then a train blows a horn"], "sample_ids": ["zALy31PjDl0", "wnpJndXuxLc"], "start_seconds": ["21", "50"], "properties": ["a man, a vehicle, a horn", "blows, vehicle, train"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity has a train blowing a horn?", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "a man speaks as a motor runs in the background"], "sample_ids": ["zofjfKhqLk8", "xZepNM9qcRA"], "start_seconds": ["10", "30"], "properties": ["background, metal, clings", "background, motor, run"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "pigeons vocalize and birds chirp"], "sample_ids": ["wvKpEYswXO0", "uiS58TNyUiw"], "start_seconds": ["150", "430"], "properties": ["sound, water, running", "vocalize, bird, chirp"], "captions_pred_video": ["of the person preparing food in the kitchen", "of the pigeon in the cage"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["uZesmtKZGSw", "su6FAOcOA8c"], "start_seconds": ["250", "4"], "properties": ["car, track, man", "engine, idle, woman"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a woman is speaking and a subway train is moving "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["a door slams shut and an object moves on a hard surface", "a child speaks in closed space"], "sample_ids": ["zkKdxzNC97Y", "yW6FWLSLkx4"], "start_seconds": ["27", "40"], "properties": ["hard, surface, door", "child, space, speak"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a door is opened and closed", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["ugHJF0hfYkg", "uYT5gxnyMWM"], "start_seconds": ["10", "50"], "properties": ["engine, running, continuously", "female, spraying, scream"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "a dog barks and whimpers"], "sample_ids": ["xzKKf9bKNUo", "sShpyu2l4YQ"], "start_seconds": ["10", "0"], "properties": ["background, noise, snoring", "barks, whimpers, dog"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "the puppies are playing with a toy"], "captions_pred_audio": ["a person snoring loudly", "a dog is barking and growling"], "question": "which entity is more active", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "a vehicles accelerate quickly and someone laughs"], "sample_ids": ["rwtmaKiCcQU", "uWPRNLnpy7Y"], "start_seconds": ["30", "10"], "properties": ["nozzle, depressed, spray can", "accelerate, laugh, vehicle"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "is taken from a car driving down the street"], "captions_pred_audio": ["spraying and people speaking", "a car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["some men converse over an engine running", "paper is crumpling consistently"], "sample_ids": ["sCiy7QS1U", "v5cSxLaHADY"], "start_seconds": ["300", "0"], "properties": ["men, converse, engine", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["shmR4OZtzqA", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["man, engine, idle", "airplane, boy, fly"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man speaks while a motor runs", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["a wooden clack accompanies nearby chirping birds", "a toilet flushes and a female speaks"], "sample_ids": ["yeFvk9x0wWI", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["clack, bird, chirp", "female, flushes, toilet"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage is blurry and out of focus"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a toilet flushes and a man speaks"], "question": "which entity is accompanied by a female speaking?", "label": 1}, {"captions": ["a man talks while vehicles pass by", "an airplane engine runs"], "sample_ids": ["sK4u5T8hW78", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["a, man, talk", "engine, airplane, runs"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a car is driving by on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "pigeons vocalize and birds chirp"], "sample_ids": ["sxYkFKFIZD0", "uiS58TNyUiw"], "start_seconds": ["20", "430"], "properties": ["screech, man, door", "vocalize, bird, chirp"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["water splashes as an animal walks through", "an infant crying as a woman laughs"], "sample_ids": ["w1ir-sZ3Im8", "xhmRY9yhC7c"], "start_seconds": ["90", "20"], "properties": ["animal, water, splashes", "a, laugh, infant"], "captions_pred_video": ["footage of a group of people riding horses through a river", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["water splashes and gurgles as people speak", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "a vehicle engine revs and tires squeal"], "sample_ids": ["xBxDz0CFVn0", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["stream, water, flow", "engine revs, tires squeal, vehicle"], "captions_pred_video": ["footage is blurry and out of focus", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "a child speaks in closed space"], "sample_ids": ["zofjfKhqLk8", "yW6FWLSLkx4"], "start_seconds": ["10", "40"], "properties": ["background, metal, clank", "child, space, speak"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "an electric engine works nearby followed by a child talking"], "sample_ids": ["wz7N8YRy74I", "xSKJGCItUWE"], "start_seconds": ["30", "10"], "properties": ["rooster, crow, background, people", "engine, work, child"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "footage of the helicopter flying in the room"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a high pitched engine is running and a child speaks"], "question": "which entity has a child talking?", "label": 1}, {"captions": ["water is sprayed across a hard surface", "a man speaks as a car is passing by"], "sample_ids": ["sQwlkXjQabo", "sK4u5T8hW78"], "start_seconds": ["10", "30"], "properties": ["water, spray, surface", "a, car, pass"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["spraying followed by silence", "a man is speaking with background noise and breathing sounds "], "question": "which entity is moving", "label": 1}, {"captions": ["some men converse over an engine running", "an emergency vehicle engine runs then a horn blows and siren sounds"], "sample_ids": ["sCiy7QS1U", "y2bVZ7rz-5M"], "start_seconds": ["300", "280"], "properties": ["men, converse, engine", "engine, horn, siren"], "captions_pred_video": [null, "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["an infant crying frantically", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["zwOBqeFTgiU", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["cry, infant, frantically", "two men, woman, birds"], "captions_pred_video": ["of the baby crying in the car seat", null], "captions_pred_audio": ["a baby cries loudly", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a human", "label": 1}, {"captions": ["goats bleat and people speak", "several insects fly while two men talk"], "sample_ids": ["z5iUE5h0EPs", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["goats bleat, people speak, language", "several, fly, men"], "captions_pred_video": ["of the goat in the barn", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a goat bleats and a man speaks", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["vehicles pass by on a roadway", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["tgbONvsP47Y", "vfYTJq7nU"], "start_seconds": ["0", "130"], "properties": ["pass, vehicle, roadway", "rustling, ducks, quack"], "captions_pred_video": ["footage of a fire truck entering a garage", null], "captions_pred_audio": ["a car is driving on the road ", "a duck quacks and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a toilet flushes and water drains", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sfAvvZwdLCY", "yajyRTUQk3U"], "start_seconds": ["20", "400"], "properties": ["water drains, flushes, water", "a woman, something, fried"], "captions_pred_video": ["footage of the toilet in the bathroom", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a person talking?", "label": 1}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "wind blowing followed by a zoom"], "sample_ids": ["smDKStoHBJo", "vr8ZXjEBhMQ"], "start_seconds": ["0", "150"], "properties": ["a, infant, speaking", "wind, blow, zoom"], "captions_pred_video": ["a man holding a crying baby in his arms", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a person is snoring while sleeping", "a stream of water runs briefly"], "sample_ids": ["vJrjSeP17yE", "x-PeY8Yb8M4"], "start_seconds": ["40", "300"], "properties": ["a person is sleeping, snoring, person", "stream, water, run"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a person snoring loudly", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a person is snoring while sleeping", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["vJrjSeP17yE", "zFjIWfSD-4"], "start_seconds": ["40", "410"], "properties": ["a person is sleeping, snoring, person", "People, motor, brakes"], "captions_pred_video": ["a black background with a small plane flying in the sky", null], "captions_pred_audio": ["a person snoring loudly", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a person?", "label": 0}, {"captions": ["continuous snoring", "dogs bark as an engine runs and a person whistles"], "sample_ids": ["sLkeqCDJIyw", "zY3icUyMdh8"], "start_seconds": ["120", "20"], "properties": ["loud, snoring, noise", "dog, bark, engine"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "footage of a bus driving through a residential street at night"], "captions_pred_audio": ["a person is snoring loudly", "a car is driving and dogs are barking and squealing "], "question": "which entity is louder", "label": 1}, {"captions": ["a person speaks briefly", "an insect buzzes around continuously"], "sample_ids": ["zOZleIRqZm4", "v25l1jef3JY"], "start_seconds": ["80", "0"], "properties": ["person, talk, brief", "buzzes, continuously, insect"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a fly is buzzing around a microphone "], "question": "which entity is not a person?", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sZPuqDgX2V0", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["commentator, race, track", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "an infant crying as a woman laughs"], "sample_ids": ["w5W5Kqtc8E", "xhmRY9yhC7c"], "start_seconds": ["100", "20"], "properties": ["wind, blow, vehicle", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a motorcycle engine is idling", "dog barking and vehicle engine idling followed shortly by vehicle engine revving"], "sample_ids": ["vZAqdHZ81yA", "zY3icUyMdh8"], "start_seconds": ["180", "20"], "properties": ["engine, motorcycle, idling", "dog, bark, engine"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage of a bus driving through a residential street at night"], "captions_pred_audio": ["an engine is idling loudly", "a car is driving and dogs are barking and squealing "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "pigeons vocalize and birds chirp"], "sample_ids": ["zkKdxzNC97Y", "uiS58TNyUiw"], "start_seconds": ["27", "430"], "properties": ["hard, surface, door", "vocalize, bird, chirp"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "of the pigeon in the cage"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["an airplane engine runs", "pigeons vocalize and birds chirp"], "sample_ids": ["yVPZ2MNWpms", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["engine, airplane, runs", "vocalize, bird, chirp"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "of the pigeon in the cage"], "captions_pred_audio": ["a car is driving by on the road ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "water flows and trickles"], "sample_ids": ["xKB8O8LTs6s", "tB7hWb9gTuQ"], "start_seconds": ["70", "30"], "properties": ["music, radio, gunshots", "water, flow, trickle"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "a man speaks as a car is passing by"], "sample_ids": ["vcmWSmvti8", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["music, man, fire", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a man speaking as a car passes by?", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["vimzuGQvdcU", "ziUT9IFTkjg"], "start_seconds": ["30", "10"], "properties": ["a, man, yells", "background, birds, rustling"], "captions_pred_video": ["a group of people are rafting down a river", null], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["people speak as gunfire rings out", "wind blowing followed by a zoom"], "sample_ids": ["wqTCwqVRDlk", "vr8ZXjEBhMQ"], "start_seconds": ["80", "150"], "properties": ["gunfire, ring, speak", "wind, blow, zoom"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking and a gun is fired", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more calm", "label": 1}, {"captions": ["a small engine spits as it runs", "two women and a man talk while a kid cries"], "sample_ids": ["sZvwOuuPGP0", "wyllXV6PjKo"], "start_seconds": ["50", "30"], "properties": ["spits, engine, runs", "a kid, talk, cry"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", null], "captions_pred_audio": ["a medium engine is running ", "a woman speaks and a baby cries"], "question": "which entity is a person", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["wy1eKjR7KC0", "tDlysoZiA1I"], "start_seconds": ["30", "0"], "properties": ["people, talk, distance", "animal, grunts, chirps"], "captions_pred_video": ["two police officers riding motorcycles down the street", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a man is speaking and a siren is going off", "birds are chirping and a rooster is crowing "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "a man speaks and is typing on a keyboard"], "sample_ids": ["ukg5L09Wpvo", "x9JovgqUcs"], "start_seconds": ["150", "500"], "properties": ["a train, a horn, a bell", "a, man, speaks, keyboard"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", null], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a man speaks and types on a keyboard"], "question": "which entity is stationary", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "pigeons vocalize and birds chirp"], "sample_ids": ["w2JXXIAdUdg", "uiS58TNyUiw"], "start_seconds": ["10", "430"], "properties": ["snoring, distance, person", "vocalize, bird, chirp"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "of the pigeon in the cage"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "a man speaks followed by another man speaking outside"], "sample_ids": ["xjvTpk2Zpr8", "viuTg1M-dqg"], "start_seconds": ["70", "30"], "properties": ["wind, blows, vehicle", "two men, speak, follow"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a single person speaking?", "label": 0}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "people applaud and hoot and chat quietly"], "sample_ids": ["y2ZBGpgbhHM", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["birds, tweet, pant", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds chirping and a dog panting", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be a performance", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "water flows and trickles"], "sample_ids": ["w6RTHR6AeAg", "tB7hWb9gTuQ"], "start_seconds": ["40", "30"], "properties": ["call, owl, screech", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "a man speaks as insects buzz and a bird chirps"], "sample_ids": ["vddP56-ogds", "t25U-v4k4ts"], "start_seconds": ["30", "40"], "properties": ["water, flow, laugh", "a, chirps, bird"], "captions_pred_video": [null, "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking and bees are buzzing"], "question": "which entity is a bird?", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "pigeons vocalize and birds chirp"], "sample_ids": ["w34HjHr6gAY", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["beeps, squawk, child speaking", "vocalize, bird, chirp"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "of the pigeon in the cage"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "a man speaks as a motor runs in the background"], "sample_ids": ["vimzuGQvdcU", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["a, man, yells", "background, motor, run"], "captions_pred_video": ["a group of people are rafting down a river", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["people clap and speak in the distance", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wwyfGO2J4", "xKB8O8LTs6s"], "start_seconds": ["90", "70"], "properties": ["clap, distance, speak", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "people applaud and hoot and chat quietly"], "sample_ids": ["tDlysoZiA1I", "wwyfGO2J4"], "start_seconds": ["0", "90"], "properties": ["animal, grunt, multiple", "people, applaud, hoot"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", null], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "a horn blows as a train chugs along and warning bells ring"], "sample_ids": ["smDKStoHBJo", "ukg5L09Wpvo"], "start_seconds": ["0", "150"], "properties": ["a, infant, speaking", "a train, a horn, a bell"], "captions_pred_video": ["a man holding a crying baby in his arms", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a airplane flies overhead as a woman speaks", "a woman speaks happily and an animal chirps"], "sample_ids": ["zj2R0XoFr5k", "uWAAAL4CIoc"], "start_seconds": ["50", "0"], "properties": ["airplane, fly, woman", "a woman, chirps, animal"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", null], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a woman is speaking and a dog is barking "], "question": "which entity is a bird?", "label": 1}, {"captions": ["a male speaks over some small clicks", "wind blows as people chatter quietly"], "sample_ids": ["uXxVebHsGZ8", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["male, clicks, speak", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "a woman speaks happily and an animal chirps"], "sample_ids": ["wztCSUxOf8", "uWAAAL4CIoc"], "start_seconds": ["130", "0"], "properties": ["a crowd, yells, applauds", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vhJWZheqaE", "zl9Dqx-j7q4"], "start_seconds": ["0", "6"], "properties": ["water drains unevenly, toilet flushes, water drains", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a toilet is flushed", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a jet engine spools up and takes off", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vBslzh7saPw", "sSMl2vc3ek"], "start_seconds": ["90", "20"], "properties": ["engine, spools, takes", "loud, multiple, distance"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "a child speaks in closed space"], "sample_ids": ["xZepNM9qcRA", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["background, motor, run", "child, space, speak"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "a propeller rotates loudly and intensely"], "sample_ids": ["sYITalLZjj4", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["water, rushes, background, birds", "loud, intense, propeller"], "captions_pred_video": ["two ducks are swimming in the water near each other", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["wind blows and birds chirp", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["sjlVMgdGSK0", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["accelerates, vehicle, race car", "a, scream, girl"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "an airplane engine runs"], "sample_ids": ["weDbePuc-Xc", "yVPZ2MNWpms"], "start_seconds": ["40", "0"], "properties": ["cartoon character, music, vocalize", "engine, airplane, runs"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a horn honks and then loudly blares", "an infant crying frantically"], "sample_ids": ["wnpJndXuxLc", "zwOBqeFTgiU"], "start_seconds": ["50", "30"], "properties": ["horn, honk, loud", "cry, infant, frantically"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "of the baby crying in the car seat"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a baby cries loudly"], "question": "which entity is louder", "label": 0}, {"captions": ["wind noise makes sound into a microphone", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["w8uLijTqtlU", "yajyRTUQk3U"], "start_seconds": ["70", "400"], "properties": ["wind, microphone, noise", "a woman, something, fried"], "captions_pred_video": ["footage is blurry and shaky", "- a woman cooking in the kitchen"], "captions_pred_audio": ["the wind is blowing strongly", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["zl9Dqx-j7q4", "vfYTJq7nU"], "start_seconds": ["6", "130"], "properties": ["motors rev, laugh, loudly", "rustling, ducks, quack"], "captions_pred_video": ["footage of a man driving a car in the dark", null], "captions_pred_audio": ["a jet engine roars ", "a duck quacks and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a train horn sounds as the train approaches", "water flows as men speak and yell"], "sample_ids": ["slZLHwNbbt4", "vJ7JPEFhyLA"], "start_seconds": ["300", "16"], "properties": ["train, horn, sound", "water, flow, men"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a moving object", "label": 0}, {"captions": ["a clock ticks quietly and rhythmically", "some men converse over an engine running"], "sample_ids": ["u7C-AEBQM", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["ticks, rhythmic, quiet", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a ticktock of a clock", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is more active", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["uPDn2BFTHk", "uYT5gxnyMWM"], "start_seconds": ["140", "50"], "properties": ["lady, laugh, baby", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a woman is speaking and a baby is crying"], "question": "which entity has a baby?", "label": 0}, {"captions": ["a person is snoring while sleeping", "small dogs yip and bark sharply"], "sample_ids": ["vJrjSeP17yE", "v-wcQf4BDY0"], "start_seconds": ["40", "120"], "properties": ["a person is sleeping, snoring, person", "bark, yip, sharply"], "captions_pred_video": ["a black background with a small plane flying in the sky", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a person snoring loudly", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "several insects fly while two men talk"], "sample_ids": ["su6FAOcOA8c", "s-T9OVOiMLo"], "start_seconds": ["4", "330"], "properties": ["engine, run, woman", "several, fly, men"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a man is speaking while insects are buzzing in the background "], "question": "which entity has a woman making an announcement?", "label": 0}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sxYkFKFIZD0", "wz7N8YRy74I"], "start_seconds": ["20", "30"], "properties": ["screech, man, door", "rooster, crow, background, men"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["a stream of water flows quickly", "children speak and play together"], "sample_ids": ["wbHTKEJZyhc", "yVVP8XvWJTo"], "start_seconds": ["20", "260"], "properties": ["stream, water, flow", "children, speak, play"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "footage of a playground at a school or daycare center"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "children are speaking and breathing with background noise "], "question": "which entity is moving faster", "label": 0}, {"captions": ["an adult woman and an adult man speak", "a car accelerates and wind blows"], "sample_ids": ["zTLVJCo4WEE", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["two people, adult, speak", "accelerates, wind, blows"], "captions_pred_video": ["- a boy with a rifle aiming at a target", null], "captions_pred_audio": ["a woman speaks and crickets chirp", "a race car accelerates and revs its engine "], "question": "which is not a person", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["yRx9txMcBl0", "ukg5L09Wpvo"], "start_seconds": ["40", "150"], "properties": ["accelerates, tires, squeals", "clickety-clack, train, whistle"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "some tunes played by whistling"], "sample_ids": ["sHbXC6na9hg", "u6BnG6YZqJ4"], "start_seconds": ["0", "0"], "properties": ["a person, saw, wood", "tune, play, whistling"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["an engine is idling and vibrating", "a person whistling a song"], "question": "which entity is not a person?", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["xyL9F5VrjkE", "yDoT73BWsdA"], "start_seconds": ["20", "10"], "properties": ["wind, blows, vehicle", "engine, revs, vehicle"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a race car accelerates and revs its engine "], "question": "which vehicle is moving", "label": 1}, {"captions": ["food is frying then a woman speaks", "a machine beeps continuously"], "sample_ids": ["ukxt9I7eMMg", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["food, woman, speak", "beeps, machine, continuously"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["a door opens and closes", "a clock ticktocks"], "sample_ids": ["vBHyYJ8pL0", "v-g-j2uTByM"], "start_seconds": ["2", "30"], "properties": ["open, close, door", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a clock is ticking loudly"], "question": "which entity is a clock", "label": 1}, {"captions": ["dogs barking and whimpering", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["tIY7qOV3rEM", "wz7N8YRy74I"], "start_seconds": ["0", "30"], "properties": ["barking, whimpering, dog", "rooster, crow, background, men"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a bird", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["xV7Mg1QucSc", "xKB8O8LTs6s"], "start_seconds": ["14", "70"], "properties": ["alarm, ticktocks, laughs", "music, gunfire, explosion"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a child yells and another yells", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vMDHu7Lxcgw", "wDVMhEdTiVw"], "start_seconds": ["410", "30"], "properties": ["two, yell, child", "gun, shoot, water"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used for hunting", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["yRx9txMcBl0", "wDVMhEdTiVw"], "start_seconds": ["40", "30"], "properties": ["accelerates, tires, squeals", "gun, shoot, water"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a gun", "label": 1}, {"captions": ["an engine runs loudly", "a man speaks and is typing on a keyboard"], "sample_ids": ["vqZuVbG6-HI", "x9JovgqUcs"], "start_seconds": ["130", "500"], "properties": ["loud, engine, run", "a, man, speaks, keyboard"], "captions_pred_video": ["footage is blurry because it's raining outside", null], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a man speaks and types on a keyboard"], "question": "which entity is quieter", "label": 1}, {"captions": ["a person is snoring while sleeping", "people speak and tapping occurs"], "sample_ids": ["vJrjSeP17yE", "tFCUUGdREgA"], "start_seconds": ["40", "70"], "properties": ["a person is sleeping, snoring, person", "people, tap, speak"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a person riding a white horse in an indoor arena"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking and walking with wind noise in the background "], "question": "which entity is a person", "label": 0}, {"captions": ["a child babbles as a woman speaks", "a clock ticktocks"], "sample_ids": ["wEBlkGWVWwE", "v-g-j2uTByM"], "start_seconds": ["260", "30"], "properties": ["a, babble, woman", "ticktocks, clock, ticktocks"], "captions_pred_video": ["shows a person writing on the whiteboard", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "soft movement is accompanied by clocks ticking in the background"], "sample_ids": ["zsLxS-uLJTw", "vlJS7LN2XyM"], "start_seconds": ["20", "30"], "properties": ["horn, blast, train", "background, clocks, ticking"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a ticktock of a clock"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "a woman speaks happily and an animal chirps"], "sample_ids": ["vVhthZ45k3Y", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["cat, purr, hiss", "a woman, chirps, animal"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a woman is speaking and a dog is barking "], "question": "which entity is more likely to be a bird", "label": 1}, {"captions": ["a toilet flushes and water sputters as it drains", "water is sprayed across a hard surface"], "sample_ids": ["smGI3C1NZc", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["water, drain, toilet", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a toilet is flushed", "spraying followed by silence"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a vehicle engine revs and tires squeal", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["yDoT73BWsdA", "uZesmtKZGSw"], "start_seconds": ["10", "250"], "properties": ["engine revs, tires squeal, vehicle", "men, talk, cars"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about cars?", "label": 1}, {"captions": ["a man speaks while water drains", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vSeGhaZt-aI", "uYT5gxnyMWM"], "start_seconds": ["50", "50"], "properties": ["water, drain, man", "a, scream, girl"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a person sneezes followed by another person speaking"], "sample_ids": ["tOSWIURC-4", "t8CV69hcvF0"], "start_seconds": ["0", "210"], "properties": ["engine, work, nearby", "person, sneeze, follow"], "captions_pred_video": [null, "of an airplane flying in the dark sky at night"], "captions_pred_audio": ["a lawn mower is running ", "a woman sneezes and speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a woman speaks and dog vocalizes", "some men converse over an engine running"], "sample_ids": ["uWAAAL4CIoc", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["a, dog, vocalize", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a conversation", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "a door opens and closes"], "sample_ids": ["slZLHwNbbt4", "vBHyYJ8pL0"], "start_seconds": ["300", "2"], "properties": ["clap, distance, horn", "open, close, door"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", null], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is more likely to be a door", "label": 1}, {"captions": ["speaking following by laughing and clapping", "an airplane engine runs"], "sample_ids": ["u2f5NpsoHBg", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["person, laugh, clap", "engine, airplane, runs"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a car is driving by on the road "], "question": "which entity is not a person?", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "an engine runs loudly"], "sample_ids": ["sQwlkXjQabo", "vqZuVbG6-HI"], "start_seconds": ["10", "130"], "properties": ["liquid, surface, spray", "loud, engine, run"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "footage is blurry because it's raining outside"], "captions_pred_audio": ["spraying followed by silence", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["people speak in a closed space", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sTpirNYo8vQ", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["people, space, speak", "a woman, something, fried"], "captions_pred_video": ["of a man taking a selfie on a bus", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vh30P49Po6s", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["loud, continuous, quacks", "a woman, something, fried"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a duck is quacking loudly", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "wind blowing followed by a zoom"], "sample_ids": ["sShpyu2l4YQ", "vr8ZXjEBhMQ"], "start_seconds": ["0", "150"], "properties": ["growl, bark, yip", "wind, blow, zoom"], "captions_pred_video": ["the puppies are playing with a toy", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a dog is barking and growling", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["small dogs yip and bark sharply", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["v-wcQf4BDY0", "xfaoyyzw2WU"], "start_seconds": ["120", "180"], "properties": ["bark, yip, sharply", "loud, jet engine, roar"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a dog barks and growls", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["v0x1odnXtP0", "vlS6YMeWAPo"], "start_seconds": ["210", "40"], "properties": ["keyboard, type, computer", "sheep, baa, birds"], "captions_pred_video": ["how to make money on youtube in spanish", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a person is typing on a keyboard", "a goat bleats and birds chirp"], "question": "which entity is not a person?", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "a infant makes noise and is excited"], "sample_ids": ["xjvTpk2Zpr8", "wIJK3-5y0kA"], "start_seconds": ["70", "30"], "properties": ["engine, run, wind", "noise, excited, infant"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a telephone rings and a bird vocalizes", "paper is crumpling consistently"], "sample_ids": ["skd2PphS6oI", "v5cSxLaHADY"], "start_seconds": ["190", "0"], "properties": ["ring, bird, vocalize", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a telephone bell rings repeatedly ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man speaking with light rustling", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["zOZleIRqZm4", "yDoT73BWsdA"], "start_seconds": ["80", "10"], "properties": ["light, rustling, man", "engine, revs, vehicle"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "an infant crying frantically"], "sample_ids": ["zkKdxzNC97Y", "zwOBqeFTgiU"], "start_seconds": ["27", "30"], "properties": ["hard, surface, door", "cry, infant, frantically"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "of the baby crying in the car seat"], "captions_pred_audio": ["a door is opened and closed", "a baby cries loudly"], "question": "which entity is a human", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "ducks quack as a man speaks and makes a duck sound"], "sample_ids": ["vVhthZ45k3Y", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["cat, purr, hiss", "ducks, quack, man"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a duck quacks and a woman speaks"], "question": "which entity is a man speaking to animals?", "label": 0}, {"captions": ["dogs bark as an engine runs and a person whistles", "pigeons vocalize and birds chirp"], "sample_ids": ["zY3icUyMdh8", "uiS58TNyUiw"], "start_seconds": ["20", "430"], "properties": ["dog, bark, engine", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "of the pigeon in the cage"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "paper folding and crinkling"], "sample_ids": ["wztCSUxOf8", "zPpG3RD8lSs"], "start_seconds": ["130", "20"], "properties": ["a crowd, yells, applauds", "paper, fold, crinkle"], "captions_pred_video": [null, "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "the wind blows and a mouse clicks "], "question": "which is not a crowd", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "a car accelerates and wind blows"], "sample_ids": ["xjvTpk2Zpr8", "u0TrcHhkPQ"], "start_seconds": ["70", "20"], "properties": ["wind, blows, vehicle", "accelerates, wind, blows"], "captions_pred_video": ["footage of a dhl plane landing on the runway", null], "captions_pred_audio": ["a jet engine roars and wind blows ", "a race car accelerates and revs its engine "], "question": "which entity is more likely to be a car", "label": 1}, {"captions": ["a clock ticktocks briefly", "a car speeding up in the distance"], "sample_ids": ["u7C-AEBQM", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["ticktocks, clock, ticktocks briefly", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a ticktock of a clock", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a bird is chirping and tweeting a bird song", "a clock ticktocks"], "sample_ids": ["wPz6QRAkEb4", "v-g-j2uTByM"], "start_seconds": ["60", "30"], "properties": ["chirps, tweets, song", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a bird in a cage on top of a pole", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["birds are chirping in the background ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "a man speaks as crickets sing"], "sample_ids": ["w5W5Kqtc8E", "ryFDPxgDOGc"], "start_seconds": ["100", "570"], "properties": ["wind, engine, scream", "a, crickets, sing"], "captions_pred_video": [null, "a group of people dressed in camouflage and hunting gear in the dark"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking with crickets chirping in the background"], "question": "which entity is quieter", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "a horn rings out as a machine runs by"], "sample_ids": ["wSVhSdj0F0", "slZLHwNbbt4"], "start_seconds": ["10", "300"], "properties": ["horn honks, keys jingle, slam", "a, horn, run"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is about a machine running by?", "label": 1}, {"captions": ["an engine runs loudly", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vqZuVbG6-HI", "wDVMhEdTiVw"], "start_seconds": ["130", "30"], "properties": ["loud, engine, run", "gun, shoot, water"], "captions_pred_video": ["footage is blurry because it's raining outside", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "three men talk while wind blows and some liquid flows"], "sample_ids": ["yajyRTUQk3U", "vJ7JPEFhyLA"], "start_seconds": ["400", "16"], "properties": ["a woman, something, fried", "three men, wind, flow"], "captions_pred_video": ["- a woman cooking in the kitchen", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about cooking?", "label": 0}, {"captions": ["a clock ticks quietly and rhythmically", "a man speaks over intermittent keyboard taps"], "sample_ids": ["u7C-AEBQM", "tw76HGONaKg"], "start_seconds": ["30", "570"], "properties": ["ticks, rhythmic, quiet", "audio, man, keyboard"], "captions_pred_video": [null, "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a ticktock of a clock", "a man speaks and types on a computer keyboard "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a cat meows as a young woman speaks", "people speak as gunfire rings out"], "sample_ids": ["x5cuQjOdM3E", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["cat, meows, young woman", "gunfire, ring, speak"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking and a gun is fired"], "question": "which entity is more calm", "label": 0}, {"captions": ["plastic is tapped on while someone speaks", "music plays and animals vocalize as a cartoon character makes sounds"], "sample_ids": ["wvKpEYswXO0", "weDbePuc-Xc"], "start_seconds": ["150", "40"], "properties": ["plastic, tap, speak", "cartoon character, music, vocalize"], "captions_pred_video": ["of the person preparing food in the kitchen", "a cartoon frog and a butterfly are sitting on the ground next to each other"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking and birds are chirping with a frog croaking in the background "], "question": "which entity is a cartoon character?", "label": 1}, {"captions": ["women speak and laugh as wind blows", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["un9VQlzgZM", "zFjIWfSD-4"], "start_seconds": ["5", "410"], "properties": ["wind, speak, laugh", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is more likely to be in a car", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "some tunes played by whistling"], "sample_ids": ["sOa7g-44Dag", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["background, man, spray", "tune, play, whistling"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a person whistling a song"], "question": "which entity is playing tunes", "label": 1}, {"captions": ["a person snoring several times", "several insects fly while two men talk"], "sample_ids": ["spJCm8tD9Zo", "s-T9OVOiMLo"], "start_seconds": ["90", "330"], "properties": ["snore, person, several", "several, fly, men"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a person?", "label": 0}, {"captions": ["a clock alarm sounds and gears turn", "water splashes as an animal walks through"], "sample_ids": ["w2M4i1mklOA", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["alarm, gears, turn", "animal, water, splashes"], "captions_pred_video": ["footage of an antique clock", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a child speaks in closed space"], "sample_ids": ["su6FAOcOA8c", "yW6FWLSLkx4"], "start_seconds": ["4", "40"], "properties": ["engine, run, woman", "child, space, speak"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["someone is snoring while sleeping", "a helicopter engine runs continuously"], "sample_ids": ["ujMt0-D-x2k", "ugHJF0hfYkg"], "start_seconds": ["0", "10"], "properties": ["snore, sleep, someone", "engine, running, continuously"], "captions_pred_video": ["of the dog playing with a toy on the floor", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a person is snoring loudly", "a helicopter is flying overhead "], "question": "which entity is not running continuously?", "label": 0}, {"captions": ["a helicopter engine runs continuously", "small dogs yip and bark sharply"], "sample_ids": ["ugHJF0hfYkg", "v-wcQf4BDY0"], "start_seconds": ["10", "120"], "properties": ["engine, running, continuously", "bark, yip, sharply"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a helicopter is flying overhead ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vbpKkWvfOu4", "vb1fPSDI4c"], "start_seconds": ["560", "30"], "properties": ["a, man, speaks", "multiple, people, yell"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "people applaud and hoot and chat quietly"], "sample_ids": ["zsLxS-uLJTw", "wwyfGO2J4"], "start_seconds": ["20", "90"], "properties": ["horn, blast, train", "people, applaud, hoot"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", null], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "people are clapping and speaking with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a airplane flies overhead as a woman speaks", "a woman speaks as she rubs two objects together"], "sample_ids": ["zj2R0XoFr5k", "vzxHnu-SFEw"], "start_seconds": ["50", "80"], "properties": ["airplane, fly, woman", "two objects, woman, speak"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which object is being rubbed together", "label": 0}, {"captions": ["here comes the train and it starts to blow the horn and get close", "a clock ticktocks"], "sample_ids": ["s7knHCFW82w", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["blow horn, get close, train", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["continuous sneezing together with speech", "cats meow and then a person begins to talk while the cats continue to meow"], "sample_ids": ["x4dZyf9Gbj0", "x5cuQjOdM3E"], "start_seconds": ["130", "30"], "properties": ["continuous, sneeze, speech", "cat, talk, meow"], "captions_pred_video": ["footage is blurry and out of focus", "a black background with an airplane flying in the sky"], "captions_pred_audio": ["a woman sneezes and speaks", "a cat meows and a woman speaks"], "question": "which entity is a cat?", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "men speak and a nozzle sprays liquid"], "sample_ids": ["xM4joTqDVp4", "wRV8yMk886E"], "start_seconds": ["160", "0"], "properties": ["background, chirp, birds", "liquid, spray, nozzle"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "two cars are parked in a parking lot at night"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a man speaks followed by a loud burst"], "question": "which entity is more likely to be used in a science class", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["zhx6hoYrHeI", "w5W5Kqtc8E"], "start_seconds": ["160", "100"], "properties": ["engine, sputter, rough", "wind, blow, vehicle"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man speaks as water trickles down a stream", "wind loudly blowing while people speak in the background followed by a horn blowing"], "sample_ids": ["sapQIQUhFc", "xjhAnI2q6hM"], "start_seconds": ["280", "6"], "properties": ["water, stream, trickles", "wind, blow, loudly"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["someone is burping continuously", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["y636gklDioE", "xKB8O8LTs6s"], "start_seconds": ["20", "70"], "properties": ["burps, burps, burps", "music, gunfire, explosion"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a person burps loudly several times", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "a man speaks followed by another man speaking outside"], "sample_ids": ["zj2R0XoFr5k", "viuTg1M-dqg"], "start_seconds": ["50", "30"], "properties": ["airplane, fly, overhead", "two men, speak, follow"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["a male speaks and another male speaks", "water splashes and wind noise is made into a microphone"], "sample_ids": ["viuTg1M-dqg", "sDSppXIlJrs"], "start_seconds": ["30", "27"], "properties": ["two males, speaking, male", "microphone, water, wind"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "a man is paddling a small wooden boat in the water"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "the wind is blowing and water is splashing"], "question": "which entity is not a person", "label": 1}, {"captions": ["a clock ticktocks briefly", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["u7C-AEBQM", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, ticktocks briefly", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a clock?", "label": 0}, {"captions": ["wind blows strongly and a young man speaks", "small dogs yip and bark sharply"], "sample_ids": ["vs65y4qmyBE", "v-wcQf4BDY0"], "start_seconds": ["340", "120"], "properties": ["wind, blows, strongly", "bark, yip, sharply"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["someone snores nearby", "birds chirp and objects are moved around"], "sample_ids": ["spJCm8tD9Zo", "yPUYU6t3rwo"], "start_seconds": ["90", "370"], "properties": ["someone snores, nearby, someone", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a person is snoring loudly", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "a machine beeps continuously"], "sample_ids": ["smDKStoHBJo", "y682ml90jGw"], "start_seconds": ["0", "11"], "properties": ["a, talk, baby, cry", "beeps, machine, continuously"], "captions_pred_video": ["a man holding a crying baby in his arms", null], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["some clanking with distant murmuring", "birds chirp and objects are moved around"], "sample_ids": ["uMTTDZ2mb4", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["clanking, murmuring, distant", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a man speaks as water trickles down a stream"], "sample_ids": ["y2bVZ7rz-5M", "sapQIQUhFc"], "start_seconds": ["280", "280"], "properties": ["motor noise, horn, siren", "water, stream, trickles"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", null], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a man is speaking and a stream is flowing in the background "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["v7jJS8aAyA", "tiDFTC-5vU"], "start_seconds": ["10", "30"], "properties": ["wind, blows, loudly", "male, duck, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a man is speaking and ducks are quacking"], "question": "which entity is speaking", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "someone whistles a tune"], "sample_ids": ["uPDn2BFTHk", "sIXTftIuUgw"], "start_seconds": ["140", "90"], "properties": ["woman, laughs, speaks", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a person whistling a song"], "question": "which entity is a person", "label": 1}, {"captions": ["food is frying while a woman speaks", "water is sprayed across a hard surface"], "sample_ids": ["yhQ2Lg-7qDY", "sQwlkXjQabo"], "start_seconds": ["130", "10"], "properties": ["food, woman, speak", "water, spray, surface"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a faucet is running and a man is speaking", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "small dogs yip and bark sharply"], "sample_ids": ["zF8yoL0rkbI", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["engine, run, someone", "bark, yip, sharply"], "captions_pred_video": ["footage of the traffic on the street at night", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["children cry and people talk", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["xLwHe825Zs", "zj2R0XoFr5k"], "start_seconds": ["18", "50"], "properties": ["people talk, children cry, people talk", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a baby cries and a woman speaks", "a woman speaks while a helicopter flies overhead "], "question": "which entity is about a boy speaking?", "label": 1}, {"captions": ["a clock ticktocks", "vehicles pass by on a roadway"], "sample_ids": ["v-g-j2uTByM", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["ticktocks, clock, ticktocks", "pass, vehicle, roadway"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a clock is ticking loudly", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vW4x7S1VfQc", "tdWhHV3X25Q"], "start_seconds": ["150", "60"], "properties": ["clacking, oil, woman", "applause, audience, yells"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["food sizzles in a frying pan", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["s7knHCFW82w", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["blow horn, get close, train", "male, duck, laugh"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", null], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "wind blows as people chatter quietly"], "sample_ids": ["y8dSeubCNI", "xBxDz0CFVn0"], "start_seconds": ["4", "30"], "properties": ["engine revving, people speaking, motorcycle", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["an engine revving and people talking in the background", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["x6ijhqRY38s", "ukg5L09Wpvo"], "start_seconds": ["250", "150"], "properties": ["something metal, glass, hit", "clickety-clack, train, whistle"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "three men talk while wind blows and some liquid flows"], "sample_ids": ["s6DESzUTGjY", "vJ7JPEFhyLA"], "start_seconds": ["16", "16"], "properties": ["wind, laugh, woman", "three men, wind, flow"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows a woman laughing", "label": 0}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["yZmhM1HcsyE", "xfaoyyzw2WU"], "start_seconds": ["4", "180"], "properties": ["engine, roar, water", "loud, jet engine, roar"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a duck quacks several times", "birds chirp and objects are moved around"], "sample_ids": ["vh30P49Po6s", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["quacks, duck, several", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a duck is quacking loudly", "insects buzz and a man speaks"], "question": "which entity is a bird", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["ylpYOorfH4o", "uYT5gxnyMWM"], "start_seconds": ["410", "50"], "properties": ["motor, run, steady", "female, spraying, scream"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a few ducks quack and scamper and a man speaks", "winds blows roughly as a vehicle races past"], "sample_ids": ["w2bYrCVLT60", "xjvTpk2Zpr8"], "start_seconds": ["120", "70"], "properties": ["ducks, speak, quack", "wind, blows, vehicle"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["ducks are quacking and a man is speaking", "a jet engine roars and wind blows "], "question": "which entity is more active", "label": 1}, {"captions": ["leaves rustle while man speaks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["zOZleIRqZm4", "vfYTJq7nU"], "start_seconds": ["80", "130"], "properties": ["leaves, rustle, speak", "rustling, ducks, quack"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a duck quacks and a woman speaks"], "question": "which entity is about a duck?", "label": 1}, {"captions": ["birds chirp and objects are moved around", "paper is crumpling consistently"], "sample_ids": ["yPUYU6t3rwo", "v5cSxLaHADY"], "start_seconds": ["370", "0"], "properties": ["birds chirp, objects are moved around, birds", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["insects buzz and a man speaks", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man woman speak while crickets sing", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["zTLVJCo4WEE", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["a, crickets, sing", "a woman, laughs, animal"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking and an animal snorting?", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vimzuGQvdcU", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["a, man, yells", "clickety-clack, train, whistle"], "captions_pred_video": ["a group of people are rafting down a river", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a train blows its whistle and blows its horn "], "question": "which entity is quieter", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a woman speaks as she rubs two objects together"], "sample_ids": ["v0x1odnXtP0", "vzxHnu-SFEw"], "start_seconds": ["210", "80"], "properties": ["keyboard, type, computer", "two objects, woman, speak"], "captions_pred_video": ["how to make money on youtube in spanish", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a person is typing on a keyboard", "a woman is speaking and breathing with mechanisms in the background "], "question": "which object is being rubbed together", "label": 0}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "a man speaks as a car is passing by"], "sample_ids": ["uRExseg-0XI", "sK4u5T8hW78"], "start_seconds": ["210", "30"], "properties": ["woman, man, water", "a, car, pass"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking as a car passes by?", "label": 1}, {"captions": ["a woman and man speak while food is frying", "water pouring and bubbling"], "sample_ids": ["zk-xJGQU8-4", "uyRfq-jKPpo"], "start_seconds": ["130", "50"], "properties": ["food, man, woman", "water, bubbles, pouring"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "a person snores loudly multiple times at a close distance"], "sample_ids": ["uYT5gxnyMWM", "sSMl2vc3ek"], "start_seconds": ["50", "20"], "properties": ["person, spray, yell", "loud, multiple, distance"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "wind loudly blowing while people speak in the background followed by a horn blowing"], "sample_ids": ["sSMl2vc3ek", "xjhAnI2q6hM"], "start_seconds": ["20", "6"], "properties": ["a person, laughs, snores", "wind, blow, loudly"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a person snoring loudly", "a truck is revving its engine and a man is speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["zgUgkpk78xU", "yDoT73BWsdA"], "start_seconds": ["70", "10"], "properties": ["clinking, humming, horn", "engine, revs, vehicle"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sZPuqDgX2V0", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["engine, accelerate, intercom", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "a stream of water runs briefly"], "sample_ids": ["uEU-Hg5MTN8", "x-PeY8Yb8M4"], "start_seconds": ["27", "300"], "properties": ["animal, grunts, snorts", "stream, water, run"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a person is snoring while sleeping", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vJrjSeP17yE", "ukg5L09Wpvo"], "start_seconds": ["40", "150"], "properties": ["a person is sleeping, snoring, person", "clickety-clack, train, whistle"], "captions_pred_video": ["a black background with a small plane flying in the sky", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a person snoring loudly", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vdoxuJn9lTc", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["burp, loud, girl", "stream, water, flow"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "footage is blurry and out of focus"], "captions_pred_audio": ["a child speaks followed by a burp", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a man woman speak while crickets sing", "a train horn blows as it passes by"], "sample_ids": ["zTLVJCo4WEE", "zVacuqSb4LI"], "start_seconds": ["30", "30"], "properties": ["a, crickets, sing", "horn, blows, train"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which is louder", "label": 0}, {"captions": ["a few ducks quack and scamper and a man speaks", "a propeller rotates loudly and intensely"], "sample_ids": ["w2bYrCVLT60", "ugHJF0hfYkg"], "start_seconds": ["120", "10"], "properties": ["ducks, speak, quack", "loud, intense, propeller"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["ducks are quacking and a man is speaking", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "paper folding and crinkling"], "sample_ids": ["smDKStoHBJo", "zPpG3RD8lSs"], "start_seconds": ["0", "20"], "properties": ["a, infant, speaking", "paper, fold, crinkle"], "captions_pred_video": ["a man holding a crying baby in his arms", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "the wind blows and a mouse clicks "], "question": "which entity is not a person", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "a infant makes noise and is excited"], "sample_ids": ["w-4gHptFNuU", "wIJK3-5y0kA"], "start_seconds": ["21", "30"], "properties": ["engine revs, accelerates, bump", "noise, excited, infant"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sSMl2vc3ek", "zj2R0XoFr5k"], "start_seconds": ["20", "50"], "properties": ["a person, laughs, snores", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a person snoring loudly", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video", "label": 1}, {"captions": ["bees buzz and wind blows", "a vehicle engine accelerating then running on idle"], "sample_ids": ["tMJne1a4AFI", "vYkA3cfXp5Q"], "start_seconds": ["0", "30"], "properties": ["bees buzz, wind blows, bees", "engine, accelerate, idle"], "captions_pred_video": ["a swarm of bees on the ground", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a swarm of bees buzzing around", "an engine is idling"], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a large crowd cheers and applauds", "wind blows as people chatter quietly"], "sample_ids": ["rqfQRErjfk8", "xBxDz0CFVn0"], "start_seconds": ["170", "30"], "properties": ["crowd, cheers, applauds", "wind, chatter, people"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "footage is blurry and out of focus"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "someone is typing on a computer keyboard"], "sample_ids": ["t97k0cejSQE", "v0x1odnXtP0"], "start_seconds": ["250", "210"], "properties": ["bird, chirp, insect", "keyboard, type, computer"], "captions_pred_video": ["a bee on a purple thistle flower", "how to make money on youtube in spanish"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["water flows as men speak and yell", "an infant crying as a woman laughs"], "sample_ids": ["vJ7JPEFhyLA", "xhmRY9yhC7c"], "start_seconds": ["16", "20"], "properties": ["water, flow, men", "a, laugh, infant"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["frogs croak and vocalize", "soft movement is accompanied by clocks ticking in the background"], "sample_ids": ["yswmmRZFItk", "vlJS7LN2XyM"], "start_seconds": ["0", "30"], "properties": ["croak, vocalize, frog", "background, clocks, ticking"], "captions_pred_video": ["a close up of a frog in the water", "of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video"], "captions_pred_audio": ["a frog is croaking", "a ticktock of a clock"], "question": "which entity is silent", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["xvDdE3zNf8Y", "y8WEcpOlT3I"], "start_seconds": ["120", "40"], "properties": ["a, female, speaks", "harsh, wind, blows"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["water splashes and a motorboat passes as people yell", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["w5W5Kqtc8E", "wz7N8YRy74I"], "start_seconds": ["100", "30"], "properties": ["water, splashes, motorboat", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman and man speak while food is frying", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["zk-xJGQU8-4", "tiDFTC-5vU"], "start_seconds": ["130", "30"], "properties": ["food, man, woman", "male, duck, laugh"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", null], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a man is speaking and ducks are quacking"], "question": "which entity has more people", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "some clanking with distant murmuring"], "sample_ids": ["vZAw4apG0Es", "uMTTDZ2mb4"], "start_seconds": ["30", "30"], "properties": ["people, clock, converse", "clanking, murmuring, distant"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "people are talking and a car is driving by with wind noise in the background "], "question": "which entity is more distant", "label": 1}, {"captions": ["birds fly and flutter around", "water runs from a faucet while some men speak and the water runs in the sink"], "sample_ids": ["wGKgwOP3h30", "vzceMbklWc"], "start_seconds": ["30", "180"], "properties": ["fly, flutter, around", "water, faucet, sink"], "captions_pred_video": ["of the pigeons in the coop", null], "captions_pred_audio": ["pigeons coo and flap their wings", "water is running and a man is speaking"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a person screams glaringly", "a frog croaks as other frogs croak in the background"], "sample_ids": ["xC8kbrKJmco", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["glaringly, screams, person", "background, frog, croak"], "captions_pred_video": [null, "a close up of a frog in the water"], "captions_pred_audio": ["a goat is bleating ", "a frog is croaking"], "question": "which entity is a croaker", "label": 1}, {"captions": ["wind blowing and birds chirping with the distant cooing of a large bird", "an engine runs loudly"], "sample_ids": ["wRBHTgrbiwg", "vqZuVbG6-HI"], "start_seconds": ["50", "130"], "properties": ["birds, chirp, cooing", "loud, engine, run"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage is blurry because it's raining outside"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a lawn mower is running and men are speaking "], "question": "which is quieter", "label": 1}, {"captions": ["an engine starts and increases in power", "an infant crying as a woman laughs"], "sample_ids": ["zjTG0gaGCUI", "xhmRY9yhC7c"], "start_seconds": ["80", "20"], "properties": ["power, increase, engine", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["an infant crying as a woman laughs", "people speak softly as food sizzles"], "sample_ids": ["xhmRY9yhC7c", "yhQ2Lg-7qDY"], "start_seconds": ["20", "130"], "properties": ["a, laugh, infant", "food, sizzle, speak"], "captions_pred_video": ["of a baby crying in a baby bouncer", "a pan filled with meat and sauce being cooked on a stove top"], "captions_pred_audio": ["a baby cries and a woman speaks", "a faucet is running and a man is speaking"], "question": "which entity is about a woman laughing?", "label": 0}, {"captions": ["a door opens and birds chirp", "water splashes as an animal walks through"], "sample_ids": ["yeFvk9x0wWI", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["door, open, birds", "animal, water, splashes"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "water splashes and gurgles as people speak"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "paper folding and crinkling"], "sample_ids": ["uEU-Hg5MTN8", "zPpG3RD8lSs"], "start_seconds": ["27", "20"], "properties": ["animal, grunts, snorts", "paper, fold, crinkle"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "the wind blows and a mouse clicks "], "question": "which entity is more likely to be a toy", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["wqN6IIHw3po", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["rain, surface, fall", "loud, jet engine, roar"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking and water is splashing", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vlS6YMeWAPo", "vb1fPSDI4c"], "start_seconds": ["40", "30"], "properties": ["noise, bleat, call", "multiple, people, yell"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", null], "captions_pred_audio": ["a goat bleats and birds chirp", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "a child speaks in closed space"], "sample_ids": ["wnpJndXuxLc", "yW6FWLSLkx4"], "start_seconds": ["50", "40"], "properties": ["blows, vehicle, train", "child, space, speak"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "a horse runs while two women talk"], "sample_ids": ["zkKdxzNC97Y", "sdvI1mHAsc"], "start_seconds": ["27", "20"], "properties": ["hard, surface, door", "two women, horse, run"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "horses clip-clop and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vKrYfzleLB8", "w5W5Kqtc8E"], "start_seconds": ["110", "100"], "properties": ["a, ring, gunshots", "wind, blow, vehicle"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", null], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blowing?", "label": 1}, {"captions": ["wind loudly blowing while people speak in the background followed by a horn blowing", "a horn rings out as a machine runs by"], "sample_ids": ["xjhAnI2q6hM", "slZLHwNbbt4"], "start_seconds": ["6", "300"], "properties": ["wind, blow, loudly", "a, horn, run"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a man speaks as a car is passing by"], "sample_ids": ["vbZ-0lGPneg", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["a woman, a television program, a bird", "a, car, pass"], "captions_pred_video": ["of a man holding a baby duck in his hands", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a bird in it?", "label": 0}, {"captions": ["electronic beeps occur in a short series", "a woman speaks and other women and a man talk with her"], "sample_ids": ["y682ml90jGw", "vbpKkWvfOu4"], "start_seconds": ["11", "560"], "properties": ["beeps, series, electronic", "a, woman, man"], "captions_pred_video": [null, "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a beeping sound is being made ", "a woman is speaking and a man is speaking"], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["a motor idles, accelerates, then slows down.", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vYkA3cfXp5Q", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["speed, idle, accelerate", "a woman, laughs, animal"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["an engine is idling", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a child speaks in closed space", "birds chirp and objects are moved around"], "sample_ids": ["yW6FWLSLkx4", "yPUYU6t3rwo"], "start_seconds": ["40", "370"], "properties": ["child, space, speak", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["an aircraft engine runs", "a woman speaks as she rubs two objects together"], "sample_ids": ["yLCORCnd35Q", "vzxHnu-SFEw"], "start_seconds": ["0", "80"], "properties": ["engine, aircraft, runs", "two objects, woman, speak"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which object is moving", "label": 0}, {"captions": ["music plays followed by gunshots and then an explosion", "a toilet flushes and a female speaks"], "sample_ids": ["xKB8O8LTs6s", "yaln9y8I7ms"], "start_seconds": ["70", "230"], "properties": ["music, gunshots, explosion", "female, flushes, toilet"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage is blurry and out of focus"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a toilet flushes and a man speaks"], "question": "which entity is more likely to be in a bathroom", "label": 1}, {"captions": ["a vehicle engine runs while a siren and horn sound", "an insect buzzes around continuously"], "sample_ids": ["u--KhUW8l1Y", "v25l1jef3JY"], "start_seconds": ["0", "0"], "properties": ["engine, sound, horn", "buzzes, continuously, insect"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a fly is buzzing around a microphone "], "question": "which entity is not a vehicle?", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["zofjfKhqLk8", "wDVMhEdTiVw"], "start_seconds": ["10", "30"], "properties": ["noise, stop, motor", "gun, shoot, water"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["a helicopter engine runs continuously", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["ugHJF0hfYkg", "zFjIWfSD-4"], "start_seconds": ["10", "410"], "properties": ["engine, running, continuously", "People, motor, brakes"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running continuously", "label": 0}, {"captions": ["a door slams shut and an object moves on a hard surface", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["zkKdxzNC97Y", "wz7N8YRy74I"], "start_seconds": ["27", "30"], "properties": ["hard, surface, door", "rooster, crow, background, men"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a child speaks", "a stream of water runs briefly"], "sample_ids": ["yW6FWLSLkx4", "x-PeY8Yb8M4"], "start_seconds": ["40", "300"], "properties": ["a, child, speaks", "stream, water, run"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["water running down a sink while a man is talking", "wind blows as people chatter quietly"], "sample_ids": ["vSeGhaZt-aI", "xBxDz0CFVn0"], "start_seconds": ["50", "30"], "properties": ["water, sink, talk", "wind, chatter, people"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wtDqrBygTcU", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["man, engine, run", "applause, audience, yells"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and a motor is running", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["sawing of wood and rustling with leaves blowing in the distance", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["uiItxDsDMFI", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["sound, distance, leaves", "a woman, a television program, a bird"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a saw is being used with background noise ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program playing far away?", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "a vehicle engine runs while a siren and horn sound"], "sample_ids": ["xvDdE3zNf8Y", "u--KhUW8l1Y"], "start_seconds": ["120", "0"], "properties": ["a, female, speaks", "engine, sound, horn"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "a firefighter spraying water from a fire hydrant at night"], "captions_pred_audio": ["a woman speaks and crumples paper", "a fire truck siren blares and a horn blows "], "question": "which entity is louder", "label": 1}, {"captions": ["water runs into a sink while men speak", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vzceMbklWc", "wDVMhEdTiVw"], "start_seconds": ["180", "30"], "properties": ["water, sink, run", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["water is running and a man is speaking", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is about water running into a sink?", "label": 0}, {"captions": ["a motor idles, accelerates, then slows down.", "several insects fly while two men talk"], "sample_ids": ["vYkA3cfXp5Q", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["speed, idle, accelerate", "several, fly, men"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["an engine is idling", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a living thing", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a train horn blows as it passes by"], "sample_ids": ["xSKJGCItUWE", "zVacuqSb4LI"], "start_seconds": ["10", "30"], "properties": ["engine, run, boy", "horn, blows, train"], "captions_pred_video": ["footage of the helicopter flying in the room", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train?", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "a woman speaks happily and an animal chirps"], "sample_ids": ["sZPuqDgX2V0", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["engine, accelerate, intercom", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a woman is speaking and a dog is barking "], "question": "which entity is a person", "label": 1}, {"captions": ["a large crowd cheers and applauds", "a stream of water runs briefly"], "sample_ids": ["rqfQRErjfk8", "x-PeY8Yb8M4"], "start_seconds": ["170", "300"], "properties": ["crowd, cheers, applauds", "stream, water, run"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["tPJvjq9QePY", "tDVADusiIoc"], "start_seconds": ["40", "60"], "properties": ["animal, bleat, moo", "water, radio, man"], "captions_pred_video": ["a dog and a sheep in a barn", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a baby cries and a man speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["v7jJS8aAyA", "wz7N8YRy74I"], "start_seconds": ["10", "30"], "properties": ["wind, blows, loudly", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man speaking to a rooster?", "label": 1}, {"captions": ["a cat meows and children speak", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["x5cuQjOdM3E", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["cat, speak, children", "applause, audience, yells"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "a vehicle engine accelerating then running on idle"], "sample_ids": ["y8dSeubCNI", "vYkA3cfXp5Q"], "start_seconds": ["4", "30"], "properties": ["engine revving, people speaking, motorcycle", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["an engine revving and people talking in the background", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "paper is crumpling consistently"], "sample_ids": ["zgUgkpk78xU", "v5cSxLaHADY"], "start_seconds": ["70", "0"], "properties": ["clinking, humming, horn", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["multiple insects buzz over rustling wind", "winds blows roughly as a vehicle races past"], "sample_ids": ["tMJne1a4AFI", "xjvTpk2Zpr8"], "start_seconds": ["0", "70"], "properties": ["wind, buzz, rustling", "wind, blows, vehicle"], "captions_pred_video": ["a swarm of bees on the ground", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a swarm of bees buzzing around", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "water splashes and a door squeaks"], "sample_ids": ["vJvryTwuAV8", "sdXV-ylviw"], "start_seconds": ["16", "190"], "properties": ["audience, cheer, man", "sound, splash, door"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", null], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a dog barks and taps with background noise "], "question": "which entity has a door that squeaks?", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["wy1eKjR7KC0", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["people, talk, distance", "engine, idle, woman"], "captions_pred_video": ["two police officers riding motorcycles down the street", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking and a siren is going off", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["an airplane engine spools and people speak", "people applaud and hoot and chat quietly"], "sample_ids": ["wTjoRj1se3U", "wwyfGO2J4"], "start_seconds": ["390", "90"], "properties": ["airplane, engine, spool", "people, applaud, hoot"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", null], "captions_pred_audio": ["a jet engine is running and people are talking", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["continuous sneezing together with speech", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["x4dZyf9Gbj0", "uEU-Hg5MTN8"], "start_seconds": ["130", "27"], "properties": ["continuous, sneeze, speech", "animal, grunts, snorts"], "captions_pred_video": ["footage is blurry and out of focus", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman sneezes and speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["water is sprayed across a hard surface", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sQwlkXjQabo", "zj2R0XoFr5k"], "start_seconds": ["10", "50"], "properties": ["water, spray, surface", "airplane, boy, fly"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["spraying followed by silence", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["bees buzz as wind blows", "water is sprayed across a hard surface"], "sample_ids": ["tMJne1a4AFI", "sQwlkXjQabo"], "start_seconds": ["0", "10"], "properties": ["bees, buzz, wind", "water, spray, surface"], "captions_pred_video": ["a swarm of bees on the ground", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a swarm of bees buzzing around", "spraying followed by silence"], "question": "which entity is not a living thing", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wyllXV6PjKo", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["a kid, talk, cry", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman speaks and a baby cries", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["sU53zg9Jp7s", "uYT5gxnyMWM"], "start_seconds": ["380", "50"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "a, scream, girl"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has a doorbell?", "label": 0}, {"captions": ["a jet engine screams, then increases its power", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vBslzh7saPw", "xBxDz0CFVn0"], "start_seconds": ["90", "30"], "properties": ["power, scream, increase", "stream, water, flow"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage is blurry and out of focus"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man is speaking with wind noise in the background "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["wind blows as people chatter quietly", "several insects fly while two men talk"], "sample_ids": ["xBxDz0CFVn0", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["wind, chatter, people", "several, fly, men"], "captions_pred_video": ["footage is blurry and out of focus", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["some people speak", "a car accelerates and wind blows"], "sample_ids": ["vbZ-0lGPneg", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "accelerates, wind, blows"], "captions_pred_video": ["of a man holding a baby duck in his hands", null], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "pigeons vocalize and birds chirp"], "sample_ids": ["u6jIvCtKarQ", "uiS58TNyUiw"], "start_seconds": ["70", "430"], "properties": ["a, man, speaks", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a person using a blender on a stove top", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["uqFtmnhuqA8", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["a, b, c", "engine, laugh, loud"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "footage of a man driving a car in the dark"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a propeller moves loudly nearby", "a child speaks in closed space"], "sample_ids": ["ugHJF0hfYkg", "yW6FWLSLkx4"], "start_seconds": ["10", "40"], "properties": ["loud, propeller, move", "child, space, speak"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "people speak as gunfire rings out"], "sample_ids": ["vimzuGQvdcU", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["a, man, yells", "gunfire, ring, speak"], "captions_pred_video": ["a group of people are rafting down a river", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a man is speaking and a gun is fired"], "question": "which entity is more calm", "label": 0}, {"captions": ["a motor idles, accelerates, then slows down.", "a woman speaks as she rubs two objects together"], "sample_ids": ["vYkA3cfXp5Q", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["speed, idle, accelerate", "two objects, woman, speak"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["an engine is idling", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of speed", "label": 0}, {"captions": ["a motorcycle engine works nearby", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["tOSWIURC-4", "xKB8O8LTs6s"], "start_seconds": ["0", "70"], "properties": ["engine, work, nearby", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a lawn mower is running ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is a scene from a movie", "label": 1}, {"captions": ["a person is burping while a girl speaks", "someone whistles a tune"], "sample_ids": ["vdoxuJn9lTc", "sIXTftIuUgw"], "start_seconds": ["40", "90"], "properties": ["person, burp, girl", "someone, tune, whistle"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", null], "captions_pred_audio": ["a child speaks followed by a burp", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a woman speaks and is crumpling paper", "a race car approaches quickly and slows down squealing tires"], "sample_ids": ["xvDdE3zNf8Y", "sEprKHm8Sj8"], "start_seconds": ["120", "90"], "properties": ["A, crumple, paper", "car, tires, slows"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["a woman speaks and crumples paper", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["uEU-Hg5MTN8", "zFjIWfSD-4"], "start_seconds": ["27", "410"], "properties": ["a woman, laughs, animal", "People, motor, brakes"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running?", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["s4Uz1Ffgo04", "w5W5Kqtc8E"], "start_seconds": ["100", "100"], "properties": ["water, rushes, vehicle", "wind, blow, vehicle"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle zooming past water?", "label": 0}, {"captions": ["a stream runs then someone speaks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["wbHTKEJZyhc", "uZesmtKZGSw"], "start_seconds": ["20", "250"], "properties": ["stream, run, someone", "men, talk, cars"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "wind blows as people chatter quietly"], "sample_ids": ["s4Uz1Ffgo04", "xBxDz0CFVn0"], "start_seconds": ["100", "30"], "properties": ["water, rushes, motorcycle", "wind, chatter, people"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a helicopter engine idles continuously", "a vehicle engine accelerating then running on idle"], "sample_ids": ["ugHJF0hfYkg", "vYkA3cfXp5Q"], "start_seconds": ["10", "30"], "properties": ["engine, idle, continuously", "engine, accelerate, idle"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a helicopter is flying overhead ", "an engine is idling"], "question": "which engine is running on idle", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "a drill runs and two people laugh"], "sample_ids": ["vhJWZheqaE", "tEE3MpBt1sg"], "start_seconds": ["0", "50"], "properties": ["water drains unevenly, toilet flushes, water drains", "two people, laugh, drill"], "captions_pred_video": [null, "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a toilet is flushed", "people are laughing breathing and speaking with background noise "], "question": "which entity is a drill?", "label": 1}, {"captions": ["goats bleat and metal clings", "wind blows as people chatter quietly"], "sample_ids": ["tH17JPjDPnc", "xBxDz0CFVn0"], "start_seconds": ["260", "30"], "properties": ["bleat, metal, clings", "wind, chatter, people"], "captions_pred_video": ["feed of the goats eating hay in the barn", "footage is blurry and out of focus"], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a goat screams and people speak in the background", "an infant crying as a woman laughs"], "sample_ids": ["xC8kbrKJmco", "xhmRY9yhC7c"], "start_seconds": ["0", "20"], "properties": ["background, goat, scream", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a goat is bleating ", "a baby cries and a woman speaks"], "question": "which entity is crying", "label": 1}, {"captions": ["some people speak", "wind blows as people chatter quietly"], "sample_ids": ["vbZ-0lGPneg", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "wind, chatter, people"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks while water drains", "some tunes played by whistling"], "sample_ids": ["vSeGhaZt-aI", "u6BnG6YZqJ4"], "start_seconds": ["50", "0"], "properties": ["water, drain, man", "tune, play, whistling"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a person whistling a song"], "question": "which entity is playing tunes", "label": 1}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "multiple people speak and children yell while water gurgles"], "sample_ids": ["wqZ135Ssz0", "vb1fPSDI4c"], "start_seconds": ["60", "30"], "properties": ["two men, woman, birds", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["people speak and tapping occurs", "waves crash against a shoreline and people speak"], "sample_ids": ["tFCUUGdREgA", "yFB25fqfU8I"], "start_seconds": ["70", "300"], "properties": ["people, tap, speak", "wave, crash, shoreline"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be in a desert?", "label": 0}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["uOpoD0gGXcs", "tDlysoZiA1I"], "start_seconds": ["120", "0"], "properties": ["chirps, woman, bird", "animal, grunts, chirps"], "captions_pred_video": ["a herd of cows grazing in the field", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["birds are chirping and a man is speaking", "birds are chirping and a rooster is crowing "], "question": "which entity is a response to a woman chirping for the birds?", "label": 0}, {"captions": ["people clap and speak in the distance", "a small voice speaks, music plays followed by a double whoosh, and then a bell dings"], "sample_ids": ["wwyfGO2J4", "tQWGZLItBXk"], "start_seconds": ["90", "170"], "properties": ["clap, distance, speak", "voice, music, whoosh"], "captions_pred_video": [null, "worms revolution screenshots"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a child speaks music plays video game sounds sound effects and sound effects play "], "question": "which entity has music", "label": 1}, {"captions": ["frogs croak and vocalize", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["yswmmRZFItk", "tiDFTC-5vU"], "start_seconds": ["0", "30"], "properties": ["croak, vocalize, frog", "male, duck, laugh"], "captions_pred_video": ["a close up of a frog in the water", null], "captions_pred_audio": ["a frog is croaking", "a man is speaking and ducks are quacking"], "question": "which entity is speaking", "label": 1}, {"captions": ["a baby cries and a woman speaks", "a infant makes noise and is excited"], "sample_ids": ["tMbMDvT50j8", "wIJK3-5y0kA"], "start_seconds": ["12", "30"], "properties": ["a, cry, woman", "noise, excited, infant"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a baby cries and a woman speaks", "a baby cries and a woman speaks"], "question": "which entity is a baby", "label": 0}, {"captions": ["a airplane flies overhead as a woman speaks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zj2R0XoFr5k", "yajyRTUQk3U"], "start_seconds": ["50", "400"], "properties": ["airplane, fly, woman", "a woman, something, fried"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "a vehicle engine accelerating then running on idle"], "sample_ids": ["uZesmtKZGSw", "vYkA3cfXp5Q"], "start_seconds": ["250", "30"], "properties": ["men, talk, cars", "engine, accelerate, idle"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["the rumbling of a bus followed by a soft male voice", "water splashes as an animal walks through"], "sample_ids": ["vK93VuO0yNc", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["male voice, bus, rumble", "animal, water, splashes"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a car drives by with wind noise in the background ", "water splashes and gurgles as people speak"], "question": "which entity is more likely to be heard", "label": 0}, {"captions": ["roadway noise occurs and a truck accelerates", "a toilet flushes and water drains"], "sample_ids": ["tgbONvsP47Y", "sfAvvZwdLCY"], "start_seconds": ["0", "20"], "properties": ["noise, truck, accelerate", "water drains, flushes, water"], "captions_pred_video": ["footage of a fire truck entering a garage", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a car is driving on the road ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a goat bleats as a person speaks", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["tPJvjq9QePY", "xfaoyyzw2WU"], "start_seconds": ["40", "180"], "properties": ["bleats, person, speak", "loud, jet engine, roar"], "captions_pred_video": ["a dog and a sheep in a barn", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a baby cries and a man speaks", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "a man speaks over intermittent keyboard taps"], "sample_ids": ["vSeGhaZt-aI", "tw76HGONaKg"], "start_seconds": ["50", "570"], "properties": ["water, bubbles, run", "audio, man, keyboard"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man speaks and types on a computer keyboard "], "question": "which entity has a man speaking over intermittent keyboard taps?", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "an infant crying as a woman laughs"], "sample_ids": ["xZepNM9qcRA", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["background, motor, run", "a, laugh, infant"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["the rumbling of a bus followed by a soft male voice", "a car speeding up in the distance"], "sample_ids": ["vK93VuO0yNc", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["male voice, bus, rumble", "distance, car, speed"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", null], "captions_pred_audio": ["a car drives by with wind noise in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["food is frying while a woman speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["yhQ2Lg-7qDY", "wwyfGO2J4"], "start_seconds": ["130", "90"], "properties": ["food, woman, speak", "people, applaud, hoot"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", null], "captions_pred_audio": ["a faucet is running and a man is speaking", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["speaking following by laughing and clapping", "a man speaks followed by another man speaking outside"], "sample_ids": ["u2f5NpsoHBg", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["person, laugh, clap", "two men, speak, follow"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a man is speaking with background noise and breathing sounds "], "question": "which entity shows two people speaking?", "label": 1}, {"captions": ["a man speaks uses a drill", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["x5eIC7S0fbg", "vfYTJq7nU"], "start_seconds": ["60", "130"], "properties": ["A man is speaking, uses a drill, and is a tool", "rustling, ducks, quack"], "captions_pred_video": ["a person in surgical gloves is using a needle to remove a small object from a tooth", null], "captions_pred_audio": ["a man is speaking and using a power tool ", "a duck quacks and a woman speaks"], "question": "which entity is a tool", "label": 0}, {"captions": ["a dog whimpers and a woman briefly talks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["y1saVTXsKwc", "wz7N8YRy74I"], "start_seconds": ["80", "30"], "properties": ["a, dog, talk", "rooster, crow, background, men"], "captions_pred_video": ["a dog playing with a pink ball", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a dog barks and a man speaks", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has more people talking", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "a clock ticktocks"], "sample_ids": ["ziUT9IFTkjg", "v-g-j2uTByM"], "start_seconds": ["10", "30"], "properties": ["background, birds, rustling", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a clock is ticking loudly"], "question": "which entity is a clock?", "label": 1}, {"captions": ["several ducks are quacking and squawking", "water pouring and bubbling"], "sample_ids": ["wfHeoPDLMaM", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["quacking, squawking, ducks", "water, bubbles, pouring"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["ducks are quacking", "water is running from a faucet"], "question": "which entity is silent", "label": 1}, {"captions": ["wind blows as people chatter quietly", "an airplane engine spools and people speak"], "sample_ids": ["xBxDz0CFVn0", "wTjoRj1se3U"], "start_seconds": ["30", "390"], "properties": ["wind, chatter, people", "airplane, engine, spool"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a jet engine is running and people are talking"], "question": "which entity is more quiet", "label": 0}, {"captions": ["someone is snoring while sleeping", "wind blows as people chatter quietly"], "sample_ids": ["ujMt0-D-x2k", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["snore, sleep, someone", "wind, chatter, people"], "captions_pred_video": ["of the dog playing with a toy on the floor", "footage is blurry and out of focus"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sjlVMgdGSK0", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["car, revving, loudly", "two men, woman, birds"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a machine runs continuously", "multiple people speak and children yell while water gurgles"], "sample_ids": ["wdXV3Pv0jiY", "vb1fPSDI4c"], "start_seconds": ["11", "30"], "properties": ["machine, running, continuously", "multiple, people, yell"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a crowd of people are talking and laughing"], "question": "which entity is not silent", "label": 1}, {"captions": ["a horse runs while two women talk", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["sdvI1mHAsc", "vfYTJq7nU"], "start_seconds": ["20", "130"], "properties": ["two women, horse, run", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 0}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["su6FAOcOA8c", "su6FAOcOA8c"], "start_seconds": ["4", "4"], "properties": ["engine, run, woman", "engine, idle, woman"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a woman is speaking and a subway train is moving "], "question": "which entity has a running engine", "label": 0}, {"captions": ["electronic beeps occur in a short series", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["y682ml90jGw", "w5W5Kqtc8E"], "start_seconds": ["11", "100"], "properties": ["beeps, series, electronic", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "water pouring and bubbling"], "sample_ids": ["w5W5Kqtc8E", "uyRfq-jKPpo"], "start_seconds": ["100", "50"], "properties": ["wind, blow, vehicle", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "a vehicle engine accelerating then running on idle"], "sample_ids": ["sfAvvZwdLCY", "vYkA3cfXp5Q"], "start_seconds": ["20", "30"], "properties": ["flushes, drains, water", "engine, accelerate, idle"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a toilet is flushed", "an engine is idling"], "question": "which entity is a machine", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "a motor runs steadily as a man speaks, then the motor revs twice"], "sample_ids": ["yDoT73BWsdA", "ylpYOorfH4o"], "start_seconds": ["10", "410"], "properties": ["engine, revs, vehicle", "motor, run, steady"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and an engine is revving"], "question": "which motor is running steadily", "label": 1}, {"captions": ["a person sniffles and then sneezes in the distance", "someone is typing on a computer keyboard"], "sample_ids": ["uRlbY6aoBU", "v0x1odnXtP0"], "start_seconds": ["0", "210"], "properties": ["a, distance, sneeze", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is sneezing ", "a person is typing on a keyboard"], "question": "which is not a person", "label": 1}, {"captions": ["wind blowing followed by a zoom", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vr8ZXjEBhMQ", "tdWhHV3X25Q"], "start_seconds": ["150", "60"], "properties": ["wind, blow, zoom", "applause, audience, yells"], "captions_pred_video": ["is taken from a motorcycle's point of view", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a man is speaking and a crowd is clapping"], "question": "which entity is a person", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "a man speaks as horns blow"], "sample_ids": ["xyL9F5VrjkE", "tHyNqRyK34A"], "start_seconds": ["20", "24"], "properties": ["engine, run, wind", "a, man, speaks"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "being taken from inside a vehicle on the street at night"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man is speaking and a car is honking with background noise "], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "paper is crumpling consistently"], "sample_ids": ["u7C-AEBQM", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["ticks, rhythmic, quiet", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a ticktock of a clock", "paper is crumpled and crinkled"], "question": "which entity is crumpling consistently", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "a propeller rotates loudly and intensely"], "sample_ids": ["uPDn2BFTHk", "ugHJF0hfYkg"], "start_seconds": ["140", "10"], "properties": ["woman, laughs, speaks", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["wtDqrBygTcU", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["man, engine, run", "water, radio, man"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking and a motor is running", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["someone snores nearby", "a woman speaks happily and an animal chirps"], "sample_ids": ["spJCm8tD9Zo", "uWAAAL4CIoc"], "start_seconds": ["90", "0"], "properties": ["someone snores, nearby, someone", "a woman, chirps, animal"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a dog is barking "], "question": "which entity is more active", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "some men converse over an engine running"], "sample_ids": ["yVumC9TGknc", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["humming, clock, birds", "men, converse, engine"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", null], "captions_pred_audio": ["a series of beeps and chirps", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xSKJGCItUWE", "tDVADusiIoc"], "start_seconds": ["10", "60"], "properties": ["engine, run, boy", "water, radio, man"], "captions_pred_video": ["footage of the helicopter flying in the room", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a boy speaking?", "label": 0}, {"captions": ["some men converse over an engine running", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sCiy7QS1U", "uYT5gxnyMWM"], "start_seconds": ["300", "50"], "properties": ["men, converse, engine", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["animals bleat and cry out and then a woman speaks", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["yZp6xizR0yU", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["animal, bleat, cry", "animal, grunts, snorts"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a woman is speaking and a baby is crying"], "question": "which animal is grunting and snorting", "label": 1}, {"captions": ["an airplane engine spools and people speak", "some tunes played by whistling"], "sample_ids": ["wTjoRj1se3U", "u6BnG6YZqJ4"], "start_seconds": ["390", "0"], "properties": ["airplane, engine, spool", "tune, play, whistling"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a jet engine is running and people are talking", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a man speaks followed by another man speaking outside", "vehicles pass by on a roadway"], "sample_ids": ["viuTg1M-dqg", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["two men, speak, follow", "pass, vehicle, roadway"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a car is driving on the road "], "question": "which entity is more likely to be in a city", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["sWZzXuWYY", "tDlysoZiA1I"], "start_seconds": ["420", "0"], "properties": ["male, speech, banging", "animal, grunts, chirps"], "captions_pred_video": [null, "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "birds are chirping and a rooster is crowing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a propeller moves loudly nearby", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["ugHJF0hfYkg", "su6FAOcOA8c"], "start_seconds": ["10", "4"], "properties": ["loud, propeller, move", "engine, idle, woman"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and a subway train is moving "], "question": "which is quieter", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "someone is typing on a computer keyboard"], "sample_ids": ["vimzuGQvdcU", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["a, man, yells", "keyboard, type, computer"], "captions_pred_video": ["a group of people are rafting down a river", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a person is typing on a keyboard"], "question": "which is a more active activity", "label": 1}, {"captions": ["several ducks are quacking and squawking", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wfHeoPDLMaM", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["quacking, squawking, ducks", "airplane, boy, fly"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["ducks are quacking", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "dishes cling together then a man begins to speak"], "sample_ids": ["slZLHwNbbt4", "sQGXqGcwOTc"], "start_seconds": ["300", "3"], "properties": ["a, horn, run", "cling, speak, dishes"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "mechanisms are operating and water is splashing "], "question": "what is the man doing in the second image?", "label": 0}, {"captions": ["a woman talks while something is fried and objects are tapped", "a infant makes noise and is excited"], "sample_ids": ["yajyRTUQk3U", "wIJK3-5y0kA"], "start_seconds": ["400", "30"], "properties": ["a woman, something, fried", "noise, excited, infant"], "captions_pred_video": ["- a woman cooking in the kitchen", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "a woman speaks as she rubs two objects together"], "sample_ids": ["y4tPJXBKDig", "vzxHnu-SFEw"], "start_seconds": ["20", "80"], "properties": ["a, noise, talk", "two objects, woman, speak"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["water flows followed by women screaming", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["w5W5Kqtc8E", "wDVMhEdTiVw"], "start_seconds": ["100", "30"], "properties": ["water, flow, women", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is about water flowing?", "label": 0}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "an infant crying frantically"], "sample_ids": ["uOpoD0gGXcs", "zwOBqeFTgiU"], "start_seconds": ["120", "30"], "properties": ["chirps, woman, bird", "cry, infant, frantically"], "captions_pred_video": ["a herd of cows grazing in the field", "of the baby crying in the car seat"], "captions_pred_audio": ["birds are chirping and a man is speaking", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a railroad crossing bell rings as a train horn blows", "someone whistles briefly"], "sample_ids": ["tZGN5a7ybxo", "uFoga8sHpiw"], "start_seconds": ["60", "90"], "properties": ["ring, train, horn", "sound, duration, pitch"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "footage of a bird in a cage"], "captions_pred_audio": ["a train is moving and blowing its horn ", "a person whistles a song"], "question": "which entity has a higher pitch", "label": 1}, {"captions": ["dogs barking and whimpering", "a motor idles, accelerates, then slows down."], "sample_ids": ["tIY7qOV3rEM", "vYkA3cfXp5Q"], "start_seconds": ["0", "30"], "properties": ["barking, whimpering, dog", "speed, idle, accelerate"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "an engine is idling"], "question": "which entity is a machine", "label": 1}, {"captions": ["a man is filing a hard object", "a man speaks followed by another man speaking outside"], "sample_ids": ["vveS8HT7Uog", "viuTg1M-dqg"], "start_seconds": ["100", "30"], "properties": ["a man, hard, object", "two men, speak, follow"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about speaking", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "small dogs yip and bark sharply"], "sample_ids": ["sWZzXuWYY", "v-wcQf4BDY0"], "start_seconds": ["420", "120"], "properties": ["male, speech, banging", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "male speech with light ticking"], "sample_ids": ["spYNpeN7rPY", "xO-Q2BlIIPU"], "start_seconds": ["1", "30"], "properties": ["a clock, ticktock, man", "male, speech, ticking"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "a clock with a green glowing display showing the time 09 07 2016 12 31 2016"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a clock ticking?", "label": 0}, {"captions": ["a weapon fires multiple times", "a clock ticktocks"], "sample_ids": ["sMC07Ucy7kg", "v-g-j2uTByM"], "start_seconds": ["10", "30"], "properties": ["weapon, fire, multiple", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage is from a car's point of view", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "water flows and trickles"], "sample_ids": ["wqADXCzngMw", "tB7hWb9gTuQ"], "start_seconds": ["340", "30"], "properties": ["engine, idle, man", "water, flow, trickle"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a mechanical buzzing getting louder"], "sample_ids": ["su6FAOcOA8c", "sEprKHm8Sj8"], "start_seconds": ["4", "90"], "properties": ["engine, run, woman", "noise, loud, buzzing"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a race car accelerates and revs its engine "], "question": "which entity is a noise", "label": 1}, {"captions": ["people speak then an engine runs", "winds blows roughly as a vehicle races past"], "sample_ids": ["uMTTDZ2mb4", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["engine, run, people", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a jet engine roars and wind blows "], "question": "which entity is about a vehicle racing past?", "label": 1}, {"captions": ["a machine beeps continuously", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["y682ml90jGw", "su6FAOcOA8c"], "start_seconds": ["11", "4"], "properties": ["beeps, machine, continuously", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a beeping sound is being made ", "a woman is speaking and a subway train is moving "], "question": "which entity is a machine?", "label": 0}, {"captions": ["a person is whistling", "a toilet flushes and a female speaks"], "sample_ids": ["sIXTftIuUgw", "yaln9y8I7ms"], "start_seconds": ["90", "230"], "properties": ["person, whistling, person", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a person whistling a song", "a toilet flushes and a man speaks"], "question": "which entity is a person", "label": 0}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a woman speaks happily and an animal chirps"], "sample_ids": ["s4Uz1Ffgo04", "uWAAAL4CIoc"], "start_seconds": ["100", "0"], "properties": ["roars, background, people speaking", "a woman, chirps, animal"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a woman is speaking and a dog is barking "], "question": "which entity is quieter", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "winds blows roughly as a vehicle races past"], "sample_ids": ["zl9Dqx-j7q4", "xjvTpk2Zpr8"], "start_seconds": ["6", "70"], "properties": ["motors rev, laugh, loudly", "wind, blows, vehicle"], "captions_pred_video": ["footage of a man driving a car in the dark", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a jet engine roars ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a woman and man are speaking", "a clock ticktocks and sounds an alarm then a man laughs"], "sample_ids": ["vbpKkWvfOu4", "xV7Mg1QucSc"], "start_seconds": ["560", "14"], "properties": ["two people, speaking, woman, man", "alarm, ticktocks, laughs"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "an alarm clock ticks and a woman laughs"], "question": "which entity is about a clock ticktocking and a man laughing?", "label": 1}, {"captions": ["electronic beeps occur in a short series", "an infant crying frantically"], "sample_ids": ["y682ml90jGw", "zwOBqeFTgiU"], "start_seconds": ["11", "30"], "properties": ["beeps, series, electronic", "cry, infant, frantically"], "captions_pred_video": [null, "of the baby crying in the car seat"], "captions_pred_audio": ["a beeping sound is being made ", "a baby cries loudly"], "question": "which entity is a human", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "an infant crying as a woman laughs"], "sample_ids": ["wz7N8YRy74I", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["rooster, crow, background, men", "a, laugh, infant"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a person is whistling", "paper is crumpling consistently"], "sample_ids": ["sIXTftIuUgw", "v5cSxLaHADY"], "start_seconds": ["90", "0"], "properties": ["person, whistling, person", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a person whistling a song", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a motorcycle engine is idling", "pigeons vocalize and birds chirp"], "sample_ids": ["vZAqdHZ81yA", "uiS58TNyUiw"], "start_seconds": ["180", "430"], "properties": ["engine, motorcycle, idling", "vocalize, bird, chirp"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "of the pigeon in the cage"], "captions_pred_audio": ["an engine is idling loudly", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a piece of wood is being placed down and sawed", "a door opens and birds chirp"], "sample_ids": ["uiItxDsDMFI", "yeFvk9x0wWI"], "start_seconds": ["30", "30"], "properties": ["wood, piece, saw", "door, open, birds"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["a saw is being used with background noise ", "birds chirp in the background as a car drives by "], "question": "which entity is a door?", "label": 1}, {"captions": ["water is sprayed across a hard surface", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["sQwlkXjQabo", "tiDFTC-5vU"], "start_seconds": ["10", "30"], "properties": ["water, spray, surface", "male, duck, laugh"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", null], "captions_pred_audio": ["spraying followed by silence", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "pigeons vocalize and birds chirp"], "sample_ids": ["tDVADusiIoc", "uiS58TNyUiw"], "start_seconds": ["60", "430"], "properties": ["water, radio, man", "vocalize, bird, chirp"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird?", "label": 1}, {"captions": ["food is frying while a woman speaks", "a car speeding up in the distance"], "sample_ids": ["yhQ2Lg-7qDY", "u0TrcHhkPQ"], "start_seconds": ["130", "20"], "properties": ["food, woman, speak", "distance, car, speed"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", null], "captions_pred_audio": ["a faucet is running and a man is speaking", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a church bell rings several times", "a man speaks as a car is passing by"], "sample_ids": ["sUVVjE3Ucp8", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["ring, bell, several", "a, car, pass"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a church bell is ringing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is moving", "label": 1}, {"captions": ["an audience gives applause", "a dog whimpers as someone inhales/exhales briefly"], "sample_ids": ["x6iCUDmRpKQ", "vmrxwuAMb2I"], "start_seconds": ["38", "40"], "properties": ["applause, audience, give", "a dog, inhales, exhales"], "captions_pred_video": ["a black background with the moon and stars in the sky", "of the dog laying on the bed with his head out of the blanket"], "captions_pred_audio": ["a group of people are clapping and cheering", "a dog barks and growls"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "a woman speaks happily and an animal chirps"], "sample_ids": ["wtDqrBygTcU", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["man, engine, run", "a woman, chirps, animal"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", null], "captions_pred_audio": ["a man is speaking and a motor is running", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "wind blows as people chatter quietly"], "sample_ids": ["ylpYOorfH4o", "xBxDz0CFVn0"], "start_seconds": ["410", "30"], "properties": ["engine, run, loud", "wind, chatter, people"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "a helicopter engine runs continuously"], "sample_ids": ["wTjoRj1se3U", "ugHJF0hfYkg"], "start_seconds": ["390", "10"], "properties": ["engine, run, people", "engine, running, continuously"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a jet engine is running and people are talking", "a helicopter is flying overhead "], "question": "which entity has a running engine", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "multiple people speak and children yell while water gurgles"], "sample_ids": ["u6jIvCtKarQ", "vb1fPSDI4c"], "start_seconds": ["70", "30"], "properties": ["a, man, speaks", "multiple, people, yell"], "captions_pred_video": ["footage of a person using a blender on a stove top", null], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["w34HjHr6gAY", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["beeps, hit, woman", "animal, grunts, snorts"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a woman is speaking and a baby is crying"], "question": "which entity has a woman talking?", "label": 0}, {"captions": ["multiple adults speaking, and a child shouting in the background", "water flows and trickles"], "sample_ids": ["yks4cLgIDMc", "tB7hWb9gTuQ"], "start_seconds": ["170", "30"], "properties": ["background, speaking, child", "water, flow, trickle"], "captions_pred_video": ["footage of two kids wrestling on the floor", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking and a child is crying", "water is splashing and gurgling"], "question": "which entity is a moving object", "label": 1}, {"captions": ["someone snores nearby", "paper is crumpling consistently"], "sample_ids": ["spJCm8tD9Zo", "v5cSxLaHADY"], "start_seconds": ["90", "0"], "properties": ["someone snores, nearby, someone", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a person is snoring loudly", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "pigeons vocalize and birds chirp"], "sample_ids": ["wqADXCzngMw", "uiS58TNyUiw"], "start_seconds": ["340", "430"], "properties": ["engine, idle, man", "vocalize, bird, chirp"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "of the pigeon in the cage"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["water is sprayed across a hard surface", "a car accelerates and wind blows"], "sample_ids": ["sQwlkXjQabo", "u0TrcHhkPQ"], "start_seconds": ["10", "20"], "properties": ["water, spray, surface", "accelerates, wind, blows"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", null], "captions_pred_audio": ["spraying followed by silence", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["t69a8aRKhmc", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["a, b, c", "male, duck, laugh"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck?", "label": 1}, {"captions": ["speaking following by laughing and clapping", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["u2f5NpsoHBg", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["person, laugh, clap", "two men, woman, birds"], "captions_pred_video": ["is being projected on a screen at the front of the stage", null], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["tiDFTC-5vU", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["male, duck, laugh", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a vehicle engine running?", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a child speaks in closed space"], "sample_ids": ["v0x1odnXtP0", "yW6FWLSLkx4"], "start_seconds": ["210", "40"], "properties": ["keyboard, type, computer", "child, space, speak"], "captions_pred_video": ["how to make money on youtube in spanish", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a person is typing on a keyboard", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a train horn sounds as a railroad passing bell rings", "a child speaks in closed space"], "sample_ids": ["zgUgkpk78xU", "yW6FWLSLkx4"], "start_seconds": ["70", "40"], "properties": ["horn, bell, train", "child, space, speak"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["x9JovgqUcs", "wqZ135Ssz0"], "start_seconds": ["500", "60"], "properties": ["a, man, speaks, keyboard", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks and types on a keyboard", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "waves crash against a shoreline and people speak"], "sample_ids": ["u7C-AEBQM", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["ticks, rhythmic, quiet", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more quiet", "label": 0}, {"captions": ["some liquid flows while a woman laughs and man talks", "birds chirp and an insect buzzes around"], "sample_ids": ["vddP56-ogds", "t97k0cejSQE"], "start_seconds": ["30", "250"], "properties": ["liquid, laughs, man", "bird, chirp, insect"], "captions_pred_video": [null, "a bee on a purple thistle flower"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a bee buzzes and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "an airplane engine runs"], "sample_ids": ["yajyRTUQk3U", "yVPZ2MNWpms"], "start_seconds": ["400", "0"], "properties": ["a woman, something, fried", "engine, airplane, runs"], "captions_pred_video": ["- a woman cooking in the kitchen", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["w2M4i1mklOA", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["loud, chime, bell", "a woman, a television program, a bird"], "captions_pred_video": ["footage of an antique clock", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a woman is speaking and a dog is whimpering"], "question": "which entity is quieter", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "winds blows roughly as a vehicle races past"], "sample_ids": ["ukg5L09Wpvo", "xjvTpk2Zpr8"], "start_seconds": ["150", "70"], "properties": ["a train, a horn, a bell", "wind, blows, vehicle"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["yks4cLgIDMc", "zj2R0XoFr5k"], "start_seconds": ["170", "50"], "properties": ["background, speaking, child", "airplane, boy, fly"], "captions_pred_video": ["footage of two kids wrestling on the floor", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and a child is crying", "a woman speaks while a helicopter flies overhead "], "question": "which entity has a boy speaking?", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["siJFXfGWgDk", "uEU-Hg5MTN8"], "start_seconds": ["50", "27"], "properties": ["man, woman, vehicle", "a woman, laughs, animal"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["a man speaks while playing a video game on a keyboard", "an airplane engine runs"], "sample_ids": ["tw76HGONaKg", "yVPZ2MNWpms"], "start_seconds": ["570", "0"], "properties": ["A, game, keyboard", "engine, airplane, runs"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a car is driving by on the road "], "question": "which is not a video game", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "water pouring and bubbling"], "sample_ids": ["wqADXCzngMw", "uyRfq-jKPpo"], "start_seconds": ["340", "50"], "properties": ["engine, idle, man", "water, bubbles, pouring"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "water flows as men speak and yell"], "sample_ids": ["s6DESzUTGjY", "vJ7JPEFhyLA"], "start_seconds": ["16", "16"], "properties": ["wind, laugh, woman", "water, flow, men"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "an infant crying as a woman laughs"], "sample_ids": ["vddP56-ogds", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["water, splash, person, laugh", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a baby cries and a woman speaks"], "question": "which entity is about a person laughing?", "label": 0}, {"captions": ["some liquid flows while a woman laughs and man talks", "a duck quacks continuously"], "sample_ids": ["vddP56-ogds", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["liquid, laughs, man", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "vehicles pass by on a roadway"], "sample_ids": ["slZLHwNbbt4", "tgbONvsP47Y"], "start_seconds": ["300", "0"], "properties": ["clap, distance, horn", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a car is driving on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["some tunes played by whistling", "a person snores loudly multiple times at a close distance"], "sample_ids": ["u6BnG6YZqJ4", "sSMl2vc3ek"], "start_seconds": ["0", "20"], "properties": ["tune, play, whistling", "loud, multiple, distance"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", null], "captions_pred_audio": ["a person whistling a song", "a person snoring loudly"], "question": "which entity is not a tune", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "multiple people speak and children yell while water gurgles"], "sample_ids": ["sOa7g-44Dag", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["audio, scratching, man", "multiple, people, yell"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", null], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a man speaks as a car is passing by"], "sample_ids": ["uzQnlJXBbOM", "sK4u5T8hW78"], "start_seconds": ["50", "30"], "properties": ["ringing, beep, stop", "a, car, pass"], "captions_pred_video": ["footage of a person using a cell phone on a table", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a telephone rings and a man speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a woman speaks happily and an animal chirps"], "sample_ids": ["uzQnlJXBbOM", "uWAAAL4CIoc"], "start_seconds": ["50", "0"], "properties": ["ringing, beep, stop", "a woman, chirps, animal"], "captions_pred_video": ["footage of a person using a cell phone on a table", null], "captions_pred_audio": ["a telephone rings and a man speaks", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["soTOh3zYJfY", "wqZ135Ssz0"], "start_seconds": ["40", "60"], "properties": ["vehicle, skid, tires", "two men, woman, birds"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["a drill drills through something then people begin laughing", "a man speaks as a car is passing by"], "sample_ids": ["tEE3MpBt1sg", "sK4u5T8hW78"], "start_seconds": ["50", "30"], "properties": ["drill, something, laugh", "a, car, pass"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["an insect buzzes around continuously", "a woman speaks as she rubs two objects together"], "sample_ids": ["v25l1jef3JY", "vzxHnu-SFEw"], "start_seconds": ["0", "80"], "properties": ["buzzes, continuously, insect", "two objects, woman, speak"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is not a person?", "label": 0}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "an adult male speaks and dials a rotary phone"], "sample_ids": ["wnpJndXuxLc", "tK4VlLsNxak"], "start_seconds": ["50", "120"], "properties": ["blows, vehicle, train", "An adult male speaks, dials, and speaks into a rotary phone"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "person is wearing a headset and holding a remote control in his hand"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a man is speaking and using a sewing machine"], "question": "which entity is a person", "label": 1}, {"captions": ["a dark barks and whimpers", "a woman speaks happily and an animal chirps"], "sample_ids": ["sYj4hpDUZDQ", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["barks, whimpers, dark", "a woman, chirps, animal"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", null], "captions_pred_audio": ["a dog barks and a cat meows", "a woman is speaking and a dog is barking "], "question": "which entity is more calm", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a vehicle engine accelerating then running on idle"], "sample_ids": ["wz7N8YRy74I", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["rooster, crow, background, people", "engine, accelerate, idle"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vXlk0lIQBFo", "w5W5Kqtc8E"], "start_seconds": ["470", "100"], "properties": ["wind, speak, vocalize", "wind, blow, vehicle"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", null], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blowing?", "label": 1}, {"captions": ["a helicopter engine runs continuously", "waves crash against a shoreline and people speak"], "sample_ids": ["ugHJF0hfYkg", "yFB25fqfU8I"], "start_seconds": ["10", "300"], "properties": ["engine, running, continuously", "wave, crash, shoreline"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "a man speaks followed by another man speaking outside"], "sample_ids": ["vddP56-ogds", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["water, splash, person, laugh", "two men, speak, follow"], "captions_pred_video": [null, "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking nearby?", "label": 0}, {"captions": ["wind blowing followed by a zoom", "someone sprays a liquid onto a hard surface making a hiss sound"], "sample_ids": ["vr8ZXjEBhMQ", "zO-LSSY92ZM"], "start_seconds": ["150", "30"], "properties": ["wind, blow, zoom", "liquid, surface, sound"], "captions_pred_video": ["is taken from a motorcycle's point of view", "youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "steam is hissing and hissing"], "question": "which entity is not a zoom", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["xERFUeZONz8", "vfYTJq7nU"], "start_seconds": ["0", "130"], "properties": ["ring, approach, traffic", "rustling, ducks, quack"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", null], "captions_pred_audio": ["an emergency vehicle siren blares", "a duck quacks and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["an audience gives applause as a man yells and a group sings", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["tdWhHV3X25Q", "xKB8O8LTs6s"], "start_seconds": ["60", "70"], "properties": ["applause, audience, yells", "music, gunfire, explosion"], "captions_pred_video": ["a man is talking to another man on a stage in front of a microphone", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "a small musical boom and then birds tweet and a few dogs pant"], "sample_ids": ["uZesmtKZGSw", "y2ZBGpgbhHM"], "start_seconds": ["250", "30"], "properties": ["men, talk, cars", "birds, tweet, pant"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", null], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "birds chirping and a dog panting"], "question": "which entity is more quiet", "label": 1}, {"captions": ["dogs barking and whimpering", "a kid speaks followed by music playing"], "sample_ids": ["tIY7qOV3rEM", "tQWGZLItBXk"], "start_seconds": ["0", "170"], "properties": ["barking, whimpering, dog", "music, kid, speak"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "worms revolution screenshots"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a child speaks music plays video game sounds sound effects and sound effects play "], "question": "which entity is a human", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "multiple people speak and children yell while water gurgles"], "sample_ids": ["x9JovgqUcs", "vb1fPSDI4c"], "start_seconds": ["500", "30"], "properties": ["a, man, speaks, keyboard", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks and types on a keyboard", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["spYNpeN7rPY", "zl9Dqx-j7q4"], "start_seconds": ["1", "6"], "properties": ["a clock, ticktock, man", "engine, laugh, loud"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "ducks quack as a man speaks and makes a duck sound"], "sample_ids": ["uYT5gxnyMWM", "vfYTJq7nU"], "start_seconds": ["50", "130"], "properties": ["female, spraying, scream", "ducks, quack, man"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a duck quacks and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["birds twitter and chirp and clatter", "an electric engine works nearby followed by a child talking"], "sample_ids": ["yeFvk9x0wWI", "xSKJGCItUWE"], "start_seconds": ["30", "10"], "properties": ["chirp, twitter, clatter", "engine, work, child"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage of the helicopter flying in the room"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a high pitched engine is running and a child speaks"], "question": "which entity is a machine", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["y4tPJXBKDig", "uEU-Hg5MTN8"], "start_seconds": ["20", "27"], "properties": ["a, noise, talk", "a woman, laughs, animal"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a woman is speaking and a baby is crying"], "question": "which entity is about a girl talking", "label": 0}, {"captions": ["a sleeping person snores and wheezes", "a man speaks as a car is passing by"], "sample_ids": ["spJCm8tD9Zo", "sK4u5T8hW78"], "start_seconds": ["90", "30"], "properties": ["snores, wheezes, sleeps", "a, car, pass"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["zF8yoL0rkbI", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["engine, run, someone", "female, spraying, scream"], "captions_pred_video": ["footage of the traffic on the street at night", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "waves crash against a shoreline and people speak"], "sample_ids": ["sQGXqGcwOTc", "yFB25fqfU8I"], "start_seconds": ["3", "300"], "properties": ["cling, speak, dishes", "wave, crash, shoreline"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage of a person surfing in the ocean"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["goats bleat and people speak", "a person snores loudly multiple times at a close distance"], "sample_ids": ["z5iUE5h0EPs", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["goats bleat, people speak, language", "loud, multiple, distance"], "captions_pred_video": ["of the goat in the barn", null], "captions_pred_audio": ["a goat bleats and a man speaks", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "a car speeding up in the distance"], "sample_ids": ["sjlVMgdGSK0", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["accelerates, vehicle, race car", "distance, car, speed"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a race car accelerates and revs its engine "], "question": "which car is speeding up in the distance", "label": 1}, {"captions": ["a machine beeps continuously", "wind blowing followed by a zoom"], "sample_ids": ["y682ml90jGw", "vr8ZXjEBhMQ"], "start_seconds": ["11", "150"], "properties": ["beeps, machine, continuously", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a beeping sound is being made ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["goats bleat and people speak", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["z5iUE5h0EPs", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["goats bleat, people speak, language", "loud, laughter, intermittent"], "captions_pred_video": ["of the goat in the barn", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a goat bleats and a man speaks", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["an electric engine works nearby followed by a child talking", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["xSKJGCItUWE", "yDoT73BWsdA"], "start_seconds": ["10", "10"], "properties": ["engine, work, child", "engine, revs, vehicle"], "captions_pred_video": ["footage of the helicopter flying in the room", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["people speak and tapping occurs", "a man speaks while a machine runs before a smoke alarm beeps"], "sample_ids": ["tFCUUGdREgA", "sG7TyPnFDR0"], "start_seconds": ["70", "180"], "properties": ["people, tap, speak", "beeps, machine, smoke alarm"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "a person is using an espresso machine in a restaurant"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a man is speaking and a microwave oven is beeping "], "question": "which entity has a smoke alarm beep?", "label": 1}, {"captions": ["a motorcycle engine works nearby", "water flows as men speak and yell"], "sample_ids": ["tOSWIURC-4", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["engine, work, nearby", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a lawn mower is running ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wTjoRj1se3U", "sSMl2vc3ek"], "start_seconds": ["390", "20"], "properties": ["engine, run, people", "loud, multiple, distance"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", null], "captions_pred_audio": ["a jet engine is running and people are talking", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a woman speaks and dog vocalizes", "winds blows roughly as a vehicle races past"], "sample_ids": ["uWAAAL4CIoc", "xjvTpk2Zpr8"], "start_seconds": ["0", "70"], "properties": ["a, dog, vocalize", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "dishes cling together then a man begins to speak"], "sample_ids": ["uPDn2BFTHk", "sQGXqGcwOTc"], "start_seconds": ["140", "3"], "properties": ["woman, laughs, speaks", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a baby laughs and a woman speaks", "mechanisms are operating and water is splashing "], "question": "which entity has a man speaking?", "label": 1}, {"captions": ["birds chirp as a bell rings", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["ziUT9IFTkjg", "wqZ135Ssz0"], "start_seconds": ["10", "60"], "properties": ["chirp, bell, ring", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is about birds?", "label": 0}, {"captions": ["a goat screams and people speak in the background", "water flows as men speak and yell"], "sample_ids": ["xC8kbrKJmco", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["background, goat, scream", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a goat is bleating ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "someone whistles a tune"], "sample_ids": ["yks4cLgIDMc", "sIXTftIuUgw"], "start_seconds": ["170", "90"], "properties": ["background, speaking, child", "someone, tune, whistle"], "captions_pred_video": ["footage of two kids wrestling on the floor", null], "captions_pred_audio": ["a man is speaking and a child is crying", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wqN6IIHw3po", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["rain, surface, fall", "applause, audience, yells"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and water is splashing", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["birds tweet and squawk", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["w1mlz3Pe4fU", "yDoT73BWsdA"], "start_seconds": ["300", "10"], "properties": ["squawk, tweet, scream", "engine, revs, vehicle"], "captions_pred_video": ["of a bird in a cage", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["birds are chirping and singing", "a race car accelerates and revs its engine "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["two women and a man talk while a kid cries", "vehicle tires screech and a man speaks before a car door opens"], "sample_ids": ["wyllXV6PjKo", "sxYkFKFIZD0"], "start_seconds": ["30", "20"], "properties": ["a kid, talk, cry", "screech, man, door"], "captions_pred_video": [null, "2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2"], "captions_pred_audio": ["a woman speaks and a baby cries", "a man is speaking while a car is revving and accelerating with a squeal in the background "], "question": "which entity has a door open?", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "a infant makes noise and is excited"], "sample_ids": ["sDSppXIlJrs", "wIJK3-5y0kA"], "start_seconds": ["27", "30"], "properties": ["microphone, water, wind", "noise, excited, infant"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a baby cries and a woman speaks"], "question": "which noise is made by a human", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["yDoT73BWsdA", "zj2R0XoFr5k"], "start_seconds": ["10", "50"], "properties": ["engine, revs, vehicle", "airplane, boy, fly"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["zhx6hoYrHeI", "tiDFTC-5vU"], "start_seconds": ["160", "30"], "properties": ["engine, sputter, rough", "male, duck, laugh"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "paper is crumpling consistently"], "sample_ids": ["vmrxwuAMb2I", "v5cSxLaHADY"], "start_seconds": ["40", "0"], "properties": ["a dog, inhales, exhales", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a dog barks and growls", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "a propeller rotates loudly and intensely"], "sample_ids": ["ugHJF0hfYkg", "ugHJF0hfYkg"], "start_seconds": ["10", "10"], "properties": ["loud, intense, propeller", "loud, intense, propeller"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a helicopter is flying overhead ", "a helicopter is flying overhead "], "question": "which propeller rotates loudly and intensely", "label": 1}, {"captions": ["small dogs yip and bark sharply", "water quietly rushes by while birds chirp in the background"], "sample_ids": ["v-wcQf4BDY0", "sYITalLZjj4"], "start_seconds": ["120", "30"], "properties": ["bark, yip, sharply", "water, rushes, background, birds"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "two ducks are swimming in the water near each other"], "captions_pred_audio": ["a dog barks and growls", "wind blows and birds chirp"], "question": "which entity is quieter", "label": 1}, {"captions": ["goats bleat and people speak", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["z5iUE5h0EPs", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["goats bleat, people speak, language", "female, spraying, scream"], "captions_pred_video": ["of the goat in the barn", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a goat bleats and a man speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a person is whistling a tune", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["scYRUkrFLiQ", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["a, tune, whistle", "People, motor, brakes"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", null], "captions_pred_audio": ["a person whistling a song", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a person", "label": 0}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a machine engine runs and a man speaks"], "sample_ids": ["xKB8O8LTs6s", "vs65y4qmyBE"], "start_seconds": ["70", "340"], "properties": ["music, radio, gunshots", "engine, run, man"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a car is engulfed in flames on the side of the road"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a heavy engine is running and men are speaking "], "question": "which entity has a man speaking?", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "a man speaks with another voice speaking in the background"], "sample_ids": ["uiItxDsDMFI", "u21-Z5gJCB8"], "start_seconds": ["30", "30"], "properties": ["wood, piece, saw", "background, voice, man"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "- a person cooking eggs in a pan on the stove"], "captions_pred_audio": ["a saw is being used with background noise ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a woman sneezes then speaks", "water flows and trickles"], "sample_ids": ["x4dZyf9Gbj0", "tB7hWb9gTuQ"], "start_seconds": ["130", "30"], "properties": ["sneezes, speaks, woman", "water, flow, trickle"], "captions_pred_video": ["footage is blurry and out of focus", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman sneezes and speaks", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["yYEVLuqEytU", "xfaoyyzw2WU"], "start_seconds": ["40", "180"], "properties": ["animal, pig, background", "loud, jet engine, roar"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["several sheep bleat and a man speaks", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "a person is snoring while sleeping"], "sample_ids": ["v5P-ThUCINM", "vJrjSeP17yE"], "start_seconds": ["400", "40"], "properties": ["background, chirp, bird", "a person is sleeping, snoring, person"], "captions_pred_video": [null, "a black background with a small plane flying in the sky"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a person snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["some men converse over an engine running", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["sCiy7QS1U", "uYT5gxnyMWM"], "start_seconds": ["300", "50"], "properties": ["men, converse, engine", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["two frogs croak at each other", "some tunes played by whistling"], "sample_ids": ["zg0X6BnhOLQ", "u6BnG6YZqJ4"], "start_seconds": ["410", "0"], "properties": ["two frogs, croak, at each other", "tune, play, whistling"], "captions_pred_video": ["footage of lightning in the sky at night", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a frog is croaking", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a woman talking as an infant is crying", "a person snores loudly multiple times at a close distance"], "sample_ids": ["tMbMDvT50j8", "sSMl2vc3ek"], "start_seconds": ["12", "20"], "properties": ["a, talk, infant", "loud, multiple, distance"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "a stream of water runs briefly"], "sample_ids": ["zF8yoL0rkbI", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["engine, run, someone", "stream, water, run"], "captions_pred_video": ["footage of the traffic on the street at night", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a clock ticktocks in wind", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["yVumC9TGknc", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["ticktocks, clock, wind", "loud, laughter, intermittent"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a series of beeps and chirps", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["there are rhythmical snoring nearby", "a frog croaks as other frogs croak in the background"], "sample_ids": ["ujMt0-D-x2k", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["snoring, rhythmical, nearby", "background, frog, croak"], "captions_pred_video": ["of the dog playing with a toy on the floor", "a close up of a frog in the water"], "captions_pred_audio": ["a person is snoring loudly", "a frog is croaking"], "question": "which entity is not a frog?", "label": 0}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "an infant crying as a woman laughs"], "sample_ids": ["x5cuQjOdM3E", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["cat, talk, meow", "a, laugh, infant"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a cat meows and a woman speaks", "a baby cries and a woman speaks"], "question": "which entity is crying", "label": 1}, {"captions": ["an animal quacks rapidly", "a machine beeps continuously"], "sample_ids": ["vh30P49Po6s", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["animal, quacks, rapidly", "beeps, machine, continuously"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "an airplane engine spools and people speak"], "sample_ids": ["sQwlkXjQabo", "wTjoRj1se3U"], "start_seconds": ["10", "390"], "properties": ["liquid, surface, spray", "airplane, engine, spool"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["spraying followed by silence", "a jet engine is running and people are talking"], "question": "which entity is a moving object", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vXlk0lIQBFo", "wDVMhEdTiVw"], "start_seconds": ["470", "30"], "properties": ["wind, speak, vocalize", "gun, shoot, water"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sxYkFKFIZD0", "su6FAOcOA8c"], "start_seconds": ["20", "4"], "properties": ["screech, man, door", "engine, idle, woman"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a woman is speaking and a subway train is moving "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["water rushes by", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["x-PeY8Yb8M4", "xKB8O8LTs6s"], "start_seconds": ["300", "70"], "properties": ["water, rushes, by", "music, gunfire, explosion"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a car is driving on a wet road ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a small engine spits as it runs", "a woman speaks as she rubs two objects together"], "sample_ids": ["sZvwOuuPGP0", "vzxHnu-SFEw"], "start_seconds": ["50", "80"], "properties": ["spits, engine, runs", "two objects, woman, speak"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a medium engine is running ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is not a person?", "label": 0}, {"captions": ["water quietly rushes by while birds chirp in the background", "a stream of water runs briefly"], "sample_ids": ["sYITalLZjj4", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["water, rushes, background, birds", "stream, water, run"], "captions_pred_video": ["two ducks are swimming in the water near each other", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["wind blows and birds chirp", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vs65y4qmyBE", "uZesmtKZGSw"], "start_seconds": ["340", "250"], "properties": ["engine, run, man", "men, talk, cars"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a helicopter engine idles continuously", "paper is crumpling consistently"], "sample_ids": ["ugHJF0hfYkg", "v5cSxLaHADY"], "start_seconds": ["10", "0"], "properties": ["engine, idle, continuously", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a helicopter is flying overhead ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a machine runs continuously", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["wdXV3Pv0jiY", "zl9Dqx-j7q4"], "start_seconds": ["11", "6"], "properties": ["machine, running, continuously", "engine, laugh, loud"], "captions_pred_video": ["footage is blurry and shaky", "footage of a man driving a car in the dark"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a jet engine roars "], "question": "which entity is not a machine?", "label": 1}, {"captions": ["a dog barks and whimpers", "a propeller rotates loudly and intensely"], "sample_ids": ["sShpyu2l4YQ", "ugHJF0hfYkg"], "start_seconds": ["0", "10"], "properties": ["barks, whimpers, dog", "loud, intense, propeller"], "captions_pred_video": ["the puppies are playing with a toy", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a dog is barking and growling", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["uKCSGgof8gI", "uYT5gxnyMWM"], "start_seconds": ["12", "50"], "properties": ["chirps, distance, signal", "female, spraying, scream"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "a duck quacks continuously"], "sample_ids": ["wjsXBsc7M40", "vh30P49Po6s"], "start_seconds": ["10", "30"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "quacks, continuously, duck"], "captions_pred_video": ["footage of the baby playing with a toothbrush", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a person speaks briefly", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["zOZleIRqZm4", "tDVADusiIoc"], "start_seconds": ["80", "60"], "properties": ["person, talk, brief", "water, radio, man"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is talking", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "a stream of water flows as people talk and wind blows"], "sample_ids": ["tw76HGONaKg", "xBxDz0CFVn0"], "start_seconds": ["570", "30"], "properties": ["A, game, keyboard", "stream, water, flow"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "footage is blurry and out of focus"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["water flows followed by women screaming", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["w5W5Kqtc8E", "zj2R0XoFr5k"], "start_seconds": ["100", "50"], "properties": ["water, flow, women", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["ylpYOorfH4o", "ziUT9IFTkjg"], "start_seconds": ["410", "10"], "properties": ["engine, run, loud", "background, birds, rustling"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", null], "captions_pred_audio": ["a man is speaking and an engine is revving", "birds are chirping and a chime is ringing "], "question": "which entity is quieter", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "paper folding and crinkling"], "sample_ids": ["u7C-AEBQM", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["ticks, rhythmic, quiet", "paper, fold, crinkle"], "captions_pred_video": [null, "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a ticktock of a clock", "the wind blows and a mouse clicks "], "question": "which entity is not quiet", "label": 1}, {"captions": ["a motorcycle engine is idling", "a car speeding up in the distance"], "sample_ids": ["vZAqdHZ81yA", "u0TrcHhkPQ"], "start_seconds": ["180", "20"], "properties": ["engine, motorcycle, idling", "distance, car, speed"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", null], "captions_pred_audio": ["an engine is idling loudly", "a race car accelerates and revs its engine "], "question": "which object is moving faster", "label": 0}, {"captions": ["an electric engine works nearby followed by a child talking", "waves crash against a shoreline and people speak"], "sample_ids": ["xSKJGCItUWE", "yFB25fqfU8I"], "start_seconds": ["10", "300"], "properties": ["engine, work, child", "wave, crash, shoreline"], "captions_pred_video": ["footage of the helicopter flying in the room", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a man speaks as horns blow", "a vehicle engine accelerating then running on idle"], "sample_ids": ["tHyNqRyK34A", "vYkA3cfXp5Q"], "start_seconds": ["24", "30"], "properties": ["a, man, speaks", "engine, accelerate, idle"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a flush is followed by gurgling water, then another flush", "wind blows as people chatter quietly"], "sample_ids": ["tqR406bGiE", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["flush, water, gurgle", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "dishes cling together then a man begins to speak"], "sample_ids": ["sDSppXIlJrs", "sQGXqGcwOTc"], "start_seconds": ["27", "3"], "properties": ["microphone, water, wind", "cling, speak, dishes"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["the wind is blowing and water is splashing", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "people applaud and hoot and chat quietly"], "sample_ids": ["xZepNM9qcRA", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["background, motor, run", "people, applaud, hoot"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", null], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["material crumbles into a microphone", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vofpvUo6NAw", "wDVMhEdTiVw"], "start_seconds": ["220", "30"], "properties": ["material, crumbles, microphone", "gun, shoot, water"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["a baby cries and wails as an adult female speaks", "a infant makes noise and is excited"], "sample_ids": ["zliInBdC98Y", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["a, baby, cries, wails", "noise, excited, infant"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a baby cries and a woman speaks", "a baby cries and a woman speaks"], "question": "which is a more active infant", "label": 1}, {"captions": ["someone snores nearby", "multiple people speak and children yell while water gurgles"], "sample_ids": ["spJCm8tD9Zo", "vb1fPSDI4c"], "start_seconds": ["90", "30"], "properties": ["someone snores, nearby, someone", "multiple, people, yell"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a person burps loudly for a long time nearby", "birds chirp and objects are moved around"], "sample_ids": ["vf44CgrjT0A", "yPUYU6t3rwo"], "start_seconds": ["20", "370"], "properties": ["loud, long, person", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a loud burp", "insects buzz and a man speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "multiple people speak and children yell while water gurgles"], "sample_ids": ["zl9Dqx-j7q4", "vb1fPSDI4c"], "start_seconds": ["6", "30"], "properties": ["motors rev, laugh, loudly", "multiple, people, yell"], "captions_pred_video": ["footage of a man driving a car in the dark", null], "captions_pred_audio": ["a jet engine roars ", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["roadway noise occurs and a truck accelerates", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["tgbONvsP47Y", "w5W5Kqtc8E"], "start_seconds": ["0", "100"], "properties": ["noise, truck, accelerate", "wind, blow, vehicle"], "captions_pred_video": ["footage of a fire truck entering a garage", null], "captions_pred_audio": ["a car is driving on the road ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a man speaks uses a drill"], "sample_ids": ["wvKpEYswXO0", "x5eIC7S0fbg"], "start_seconds": ["150", "60"], "properties": ["sound, water, running", "A man is speaking, uses a drill, and is a tool"], "captions_pred_video": ["of the person preparing food in the kitchen", "a person in surgical gloves is using a needle to remove a small object from a tooth"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking and using a power tool "], "question": "which entity is a tool", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "heavy rain splashes as it falls"], "sample_ids": ["uZesmtKZGSw", "wP8ZKrlx3oA"], "start_seconds": ["250", "40"], "properties": ["car, track, man", "fall, rain, splash"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "footage of a flooded street in the middle of a desert with mountains in the background"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a heavy rain is falling on a surface"], "question": "which entity is not a person", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "a horn rings out as a machine runs by"], "sample_ids": ["sjlVMgdGSK0", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["accelerates, vehicle, race car", "a, horn, run"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "an engine runs loudly"], "sample_ids": ["y8WEcpOlT3I", "vqZuVbG6-HI"], "start_seconds": ["40", "130"], "properties": ["harsh, wind, blows", "loud, engine, run"], "captions_pred_video": ["on how to use a sewing machine youtube", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["yks4cLgIDMc", "tDlysoZiA1I"], "start_seconds": ["170", "0"], "properties": ["background, speaking, child", "animal, grunts, chirps"], "captions_pred_video": ["footage of two kids wrestling on the floor", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a man is speaking and a child is crying", "birds are chirping and a rooster is crowing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "small dogs yip and bark sharply"], "sample_ids": ["wz7N8YRy74I", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["rooster, crow, background, people", "bark, yip, sharply"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a train horn blows as it passes by", "a baby laugh at a sputter"], "sample_ids": ["zVacuqSb4LI", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["horn, blows, train", "laugh, sputter, baby"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is a person", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "a woman speaks as she rubs two objects together"], "sample_ids": ["sNB8zxXneIM", "vzxHnu-SFEw"], "start_seconds": ["20", "80"], "properties": ["several, quack, cocks", "two objects, woman, speak"], "captions_pred_video": ["a group of geese in a cage", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["birds chirp as a train approaches", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["xM4joTqDVp4", "zFjIWfSD-4"], "start_seconds": ["160", "410"], "properties": ["bird, chirp, train", "People, motor, brakes"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", null], "captions_pred_audio": ["birds are chirping and a train is moving ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a train?", "label": 0}, {"captions": ["a duck quacks continuously", "continuous snoring"], "sample_ids": ["vh30P49Po6s", "sLkeqCDJIyw"], "start_seconds": ["30", "120"], "properties": ["quacks, continuously, duck", "loud, snoring, noise"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", ", what is the man doing on the couch? sleeping"], "captions_pred_audio": ["a duck is quacking loudly", "a person is snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["birds fly and flutter around", "frogs croak and vocalize"], "sample_ids": ["wGKgwOP3h30", "yswmmRZFItk"], "start_seconds": ["30", "0"], "properties": ["fly, flutter, around", "croak, vocalize, frog"], "captions_pred_video": ["of the pigeons in the coop", "a close up of a frog in the water"], "captions_pred_audio": ["pigeons coo and flap their wings", "a frog is croaking"], "question": "which animal is more likely to be a frog", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "an infant crying frantically"], "sample_ids": ["zl9Dqx-j7q4", "zwOBqeFTgiU"], "start_seconds": ["6", "30"], "properties": ["engine, laugh, loud", "cry, infant, frantically"], "captions_pred_video": ["footage of a man driving a car in the dark", "of the baby crying in the car seat"], "captions_pred_audio": ["a jet engine roars ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "a toilet flushes and a female speaks"], "sample_ids": ["xBxDz0CFVn0", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["stream, water, flow", "female, flushes, toilet"], "captions_pred_video": ["footage is blurry and out of focus", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a toilet flushes and a man speaks"], "question": "which entity is a source of water", "label": 0}, {"captions": ["a man talks as something metal hits against and glass is set down", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["x6ijhqRY38s", "w5W5Kqtc8E"], "start_seconds": ["250", "100"], "properties": ["something metal, glass, hit", "wind, blow, vehicle"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", null], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["some clanking with distant murmuring", "some tunes played by whistling"], "sample_ids": ["uMTTDZ2mb4", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["clanking, murmuring, distant", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a person whistling a song"], "question": "which entity is played by whistling", "label": 1}, {"captions": ["a male speaks and another male speaks", "a toilet flushes and a female speaks"], "sample_ids": ["viuTg1M-dqg", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["two males, speaking, male", "female, flushes, toilet"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a toilet flushes and a man speaks"], "question": "which entity is a video of a toilet flushing?", "label": 1}, {"captions": ["people speak softly as food sizzles", "some tunes played by whistling"], "sample_ids": ["yhQ2Lg-7qDY", "u6BnG6YZqJ4"], "start_seconds": ["130", "0"], "properties": ["food, sizzle, speak", "tune, play, whistling"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["yVumC9TGknc", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["humming, clock, birds", "a woman, a television program, a bird"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a series of beeps and chirps", "a woman is speaking and a dog is whimpering"], "question": "which entity has a clock?", "label": 0}, {"captions": ["a man talks as something metal hits against and glass is set down", "a man speaks while rain falls onto a hard surface"], "sample_ids": ["x6ijhqRY38s", "wqN6IIHw3po"], "start_seconds": ["250", "30"], "properties": ["something metal, glass, hit", "rain, surface, fall"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "in your own words what is happening in this screenshot? blood splattered all over the place"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a man is speaking and water is splashing"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a clock ticktocks continuously", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vlJS7LN2XyM", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, ticktocks continuously", "multiple, people, yell"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", null], "captions_pred_audio": ["a ticktock of a clock", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["an adult woman and an adult man speak", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["zTLVJCo4WEE", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["two people, adult, speak", "female, spraying, scream"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a woman and a man speaking?", "label": 0}, {"captions": ["an engine runs and wind blows", "birds chirp and objects are moved around"], "sample_ids": ["vs65y4qmyBE", "yPUYU6t3rwo"], "start_seconds": ["340", "370"], "properties": ["engine, run, wind", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "insects buzz and a man speaks"], "question": "which entity is moving around objects", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sG7TyPnFDR0", "wqZ135Ssz0"], "start_seconds": ["180", "60"], "properties": ["beeps, machine, smoke alarm", "two men, woman, birds"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", null], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["the revving of an engine throttle followed by a man speaking", "a vehicle accelerates squealing tires"], "sample_ids": ["tezvROoo4bs", "sd7xVssqlw"], "start_seconds": ["40", "50"], "properties": ["audio, throttle, speaking", "accelerates, tires, squealing"], "captions_pred_video": ["footage of a busy city street with cars parked on both sides of the road", null], "captions_pred_audio": ["a car accelerates and revs while a man speaks ", "a car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a duck quacks several times", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vh30P49Po6s", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["quacks, duck, several", "gun, shoot, water"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a duck is quacking loudly", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be shot", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "people speak as gunfire rings out"], "sample_ids": ["wqADXCzngMw", "wqTCwqVRDlk"], "start_seconds": ["340", "80"], "properties": ["engine, idle, man", "gunfire, ring, speak"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tOSWIURC-4", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["engine, work, nearby", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a lawn mower is running ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "a duck quacks several times"], "sample_ids": ["zcDwZ6W7E3E", "vh30P49Po6s"], "start_seconds": ["180", "30"], "properties": ["man, speak, motorcycles", "quacks, duck, several"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a duck is quacking loudly"], "question": "which entity is a single action", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["xyL9F5VrjkE", "tiDFTC-5vU"], "start_seconds": ["20", "30"], "properties": ["wind, motor, distance", "male, duck, laugh"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", null], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["a child yells and another yells", "dishes cling together then a man begins to speak"], "sample_ids": ["vMDHu7Lxcgw", "sQGXqGcwOTc"], "start_seconds": ["410", "3"], "properties": ["two, yell, child", "cling, speak, dishes"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "a woman speaks as she rubs two objects together"], "sample_ids": ["xzKKf9bKNUo", "vzxHnu-SFEw"], "start_seconds": ["10", "80"], "properties": ["background, noise, snoring", "two objects, woman, speak"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "an engine runs loudly"], "sample_ids": ["w5W5Kqtc8E", "vqZuVbG6-HI"], "start_seconds": ["100", "130"], "properties": ["wind, blow, vehicle", "loud, engine, run"], "captions_pred_video": [null, "footage is blurry because it's raining outside"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a lawn mower is running and men are speaking "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["a motorcycle engine works nearby", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["tOSWIURC-4", "yDoT73BWsdA"], "start_seconds": ["0", "10"], "properties": ["engine, work, nearby", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a lawn mower is running ", "a race car accelerates and revs its engine "], "question": "which entity has a vehicle passing by?", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "metal clacking as food and oil sizzles followed by a woman talking"], "sample_ids": ["zcDwZ6W7E3E", "vW4x7S1VfQc"], "start_seconds": ["180", "150"], "properties": ["a, man, speak", "clacking, oil, woman"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage of a person cooking fish in a frying pan on a stove top"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "food sizzles in a frying pan"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a mechanical buzzing getting louder", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["sEprKHm8Sj8", "ukg5L09Wpvo"], "start_seconds": ["90", "150"], "properties": ["noise, loud, buzzing", "clickety-clack, train, whistle"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a train blows its whistle and blows its horn "], "question": "which noise is continuous", "label": 1}, {"captions": ["some men converse over an engine running", "someone snores nearby"], "sample_ids": ["sCiy7QS1U", "spJCm8tD9Zo"], "start_seconds": ["300", "90"], "properties": ["men, converse, engine", "someone snores, nearby, someone"], "captions_pred_video": [null, "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wsHBIgzs9Fs", "xKB8O8LTs6s"], "start_seconds": ["50", "70"], "properties": ["horn, continuous, buzzing", "music, gunfire, explosion"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "water flows and trickles"], "sample_ids": ["v7jJS8aAyA", "tB7hWb9gTuQ"], "start_seconds": ["10", "30"], "properties": ["wind, blows, loudly", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "a wooden clack accompanies nearby chirping birds"], "sample_ids": ["tw76HGONaKg", "yeFvk9x0wWI"], "start_seconds": ["570", "30"], "properties": ["A, game, keyboard", "clack, bird, chirp"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "birds chirp in the background as a car drives by "], "question": "which entity is not a video game", "label": 1}, {"captions": ["several ducks are quacking and squawking", "a stream of water flows as people talk and wind blows"], "sample_ids": ["wfHeoPDLMaM", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["quacking, squawking, ducks", "stream, water, flow"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "footage is blurry and out of focus"], "captions_pred_audio": ["ducks are quacking", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["people speak and tapping occurs", "some men converse over an engine running"], "sample_ids": ["tFCUUGdREgA", "sCiy7QS1U"], "start_seconds": ["70", "300"], "properties": ["people, tap, speak", "men, converse, engine"], "captions_pred_video": ["a person riding a white horse in an indoor arena", null], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows people speaking and tapping occurs?", "label": 0}, {"captions": ["people speak and tapping occurs", "wind blowing followed by a zoom"], "sample_ids": ["tFCUUGdREgA", "vr8ZXjEBhMQ"], "start_seconds": ["70", "150"], "properties": ["people, tap, speak", "wind, blow, zoom"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more likely to blow", "label": 1}, {"captions": ["a woman and man speak while food is frying", "a man speaks as a motor runs in the background"], "sample_ids": ["zk-xJGQU8-4", "xZepNM9qcRA"], "start_seconds": ["130", "30"], "properties": ["food, man, woman", "background, motor, run"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "a dark barks and whimpers"], "sample_ids": ["sWZzXuWYY", "sYj4hpDUZDQ"], "start_seconds": ["420", "30"], "properties": ["male, speech, banging", "barks, whimpers, dark"], "captions_pred_video": [null, "a brown and white dog standing in front of a wall with its mouth open"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a dog barks and a cat meows"], "question": "which entity is quieter", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "water pouring and bubbling"], "sample_ids": ["vXlk0lIQBFo", "uyRfq-jKPpo"], "start_seconds": ["470", "50"], "properties": ["wind, talk, vocalize", "water, bubbles, pouring"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a train horn sounds and railroad crossing ring", "paper is crumpling consistently"], "sample_ids": ["s7knHCFW82w", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["horn, sound, train", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "a person screams glaringly"], "sample_ids": ["sNB8zxXneIM", "xC8kbrKJmco"], "start_seconds": ["20", "0"], "properties": ["several, quack, cocks", "glaringly, screams, person"], "captions_pred_video": ["a group of geese in a cage", null], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a goat is bleating "], "question": "which entity is a person?", "label": 1}, {"captions": ["motors runs briefly and tires screech", "several insects fly while two men talk"], "sample_ids": ["yRx9txMcBl0", "s-T9OVOiMLo"], "start_seconds": ["40", "330"], "properties": ["motors, tires, screech", "several, fly, men"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a vehicle?", "label": 0}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "small dogs yip and bark sharply"], "sample_ids": ["vW4x7S1VfQc", "v-wcQf4BDY0"], "start_seconds": ["150", "120"], "properties": ["clacking, oil, woman", "bark, yip, sharply"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["food sizzles in a frying pan", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "gunshots ring out, a man yells, and more shots follow"], "sample_ids": ["yYEVLuqEytU", "vKrYfzleLB8"], "start_seconds": ["40", "110"], "properties": ["animal, pig, background", "a, ring, gunshots"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "stock footage of a person holding a gun in their hand"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a man is speaking with background noise and a cap gun is fired "], "question": "which entity has more gunshots", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "birds chirp and objects are moved around"], "sample_ids": ["y1saVTXsKwc", "yPUYU6t3rwo"], "start_seconds": ["80", "370"], "properties": ["a, dog, talk", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a dog playing with a pink ball", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a dog barks and a man speaks", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "some men converse over an engine running"], "sample_ids": ["vuUVPzd2FXw", "sCiy7QS1U"], "start_seconds": ["160", "300"], "properties": ["a, steam, release", "men, converse, engine"], "captions_pred_video": ["of the person cooking on the grill with a spatula", null], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows a man talking?", "label": 0}, {"captions": ["a jet engine roars, almost making a man inaudible", "a man speaks as a boat engine runs"], "sample_ids": ["xfaoyyzw2WU", "wtDqrBygTcU"], "start_seconds": ["180", "30"], "properties": ["loud, jet engine, roar", "man, engine, run"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "shows a person riding on the back of a boat as it speeds through the water"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a man is speaking and a motor is running"], "question": "which engine is running", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "birds chirp and objects are moved around"], "sample_ids": ["y2bVZ7rz-5M", "yPUYU6t3rwo"], "start_seconds": ["280", "370"], "properties": ["motor noise, horn, siren", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "insects buzz and a man speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "three men talk while wind blows and some liquid flows"], "sample_ids": ["rqu8iB22IY", "vJ7JPEFhyLA"], "start_seconds": ["5", "16"], "properties": ["sound, repeats, laugh", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man talking?", "label": 1}, {"captions": ["a person is burping while a girl speaks", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vdoxuJn9lTc", "vbZ-0lGPneg"], "start_seconds": ["40", "30"], "properties": ["person, burp, girl", "a woman, a television program, a bird"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a child speaks followed by a burp", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird nearby?", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a vehicle engine accelerates and wind blows"], "sample_ids": ["uzQnlJXBbOM", "wudZTNBtVqc"], "start_seconds": ["50", "60"], "properties": ["ringing, beep, stop", "accelerates, engine, wind"], "captions_pred_video": ["footage of a person using a cell phone on a table", "footage is of a parking lot with cars parked in it"], "captions_pred_audio": ["a telephone rings and a man speaks", "a car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man talks as several small engines run", "a woman and man are speaking"], "sample_ids": ["u9A6VZQCZpU", "vbpKkWvfOu4"], "start_seconds": ["30", "560"], "properties": ["a, man, talk", "two people, speaking, woman, man"], "captions_pred_video": [null, "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a woman is speaking and a man is speaking"], "question": "which entity has more people", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "tapping occurs then a baby cries"], "sample_ids": ["weDbePuc-Xc", "wIJK3-5y0kA"], "start_seconds": ["40", "30"], "properties": ["cartoon character, music, vocalize", "a, cry, baby"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a baby cries and a woman speaks"], "question": "which entity is a baby?", "label": 1}, {"captions": ["a toilet flushes and water drains", "an infant crying as a woman laughs"], "sample_ids": ["sfAvvZwdLCY", "xhmRY9yhC7c"], "start_seconds": ["20", "20"], "properties": ["water drains, flushes, water", "a, laugh, infant"], "captions_pred_video": ["footage of the toilet in the bathroom", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a toilet is flushed", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "an adult speaks and is typing on a computer keyboard"], "sample_ids": ["xvDdE3zNf8Y", "x9JovgqUcs"], "start_seconds": ["120", "500"], "properties": ["a, female, speaks", "An adult is speaking, typing, and using a computer keyboard"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", null], "captions_pred_audio": ["a woman speaks and crumples paper", "a man speaks and types on a keyboard"], "question": "which entity is speaking", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "repeated tapping is accompanied by water running and a woman speaking softly"], "sample_ids": ["sDSppXIlJrs", "wvKpEYswXO0"], "start_seconds": ["27", "150"], "properties": ["microphone, water, wind", "sound, water, running"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "of the person preparing food in the kitchen"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a woman is speaking and tapping with background noise and water running "], "question": "which entity has water running?", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["xSKJGCItUWE", "wwyfGO2J4"], "start_seconds": ["10", "90"], "properties": ["engine, run, boy", "people, applaud, hoot"], "captions_pred_video": ["footage of the helicopter flying in the room", null], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a dark barks and whimpers", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sYj4hpDUZDQ", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["barks, whimpers, dark", "stream, water, flow"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", "footage is blurry and out of focus"], "captions_pred_audio": ["a dog barks and a cat meows", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "people cheer as a vehicle engine revs"], "sample_ids": ["wvKpEYswXO0", "xjhAnI2q6hM"], "start_seconds": ["150", "6"], "properties": ["water, tap, run", "engine revs, vehicle, people"], "captions_pred_video": ["of the person preparing food in the kitchen", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["birds chirp as a bell rings", "a duck quacks continuously"], "sample_ids": ["ziUT9IFTkjg", "vh30P49Po6s"], "start_seconds": ["10", "30"], "properties": ["chirp, bell, ring", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a duck is quacking loudly"], "question": "which entity is a bird", "label": 0}, {"captions": ["food is frying then a woman speaks", "birds chirp as a man speaks and a younger person speaks"], "sample_ids": ["ukxt9I7eMMg", "xl2PIWyXaM"], "start_seconds": ["30", "160"], "properties": ["food, woman, speak", "chirp, man, younger person"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "birds are chirping and people are talking"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["speaking following by laughing and clapping", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["u2f5NpsoHBg", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["person, laugh, clap", "a woman, laughs, animal"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a woman is speaking and a baby is crying"], "question": "which entity has a person speaking and laughing and clapping?", "label": 0}, {"captions": ["pigeons vocalize and birds chirp", "a clock ticktocks"], "sample_ids": ["uiS58TNyUiw", "v-g-j2uTByM"], "start_seconds": ["430", "30"], "properties": ["vocalize, bird, chirp", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of the pigeon in the cage", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["water bubbles and gurgles.", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["tB7hWb9gTuQ", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["bubbles, gurgles, water", "female, spraying, scream"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["water is splashing and gurgling", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking and spraying?", "label": 1}, {"captions": ["an airplane engine spools and people speak", "water pouring and bubbling"], "sample_ids": ["wTjoRj1se3U", "uyRfq-jKPpo"], "start_seconds": ["390", "50"], "properties": ["airplane, engine, spool", "water, bubbles, pouring"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a jet engine is running and people are talking", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["women speak and laugh as wind blows", "pigeons vocalize and birds chirp"], "sample_ids": ["un9VQlzgZM", "uiS58TNyUiw"], "start_seconds": ["5", "430"], "properties": ["wind, speak, laugh", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a person", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["y8dSeubCNI", "yajyRTUQk3U"], "start_seconds": ["4", "400"], "properties": ["men, women, car", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["an engine revving and people talking in the background", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["continuous sneezing together with speech", "a stream of water runs briefly"], "sample_ids": ["x4dZyf9Gbj0", "x-PeY8Yb8M4"], "start_seconds": ["130", "300"], "properties": ["continuous, sneeze, speech", "stream, water, run"], "captions_pred_video": ["footage is blurry and out of focus", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman sneezes and speaks", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["yI-KvObbDoY", "yajyRTUQk3U"], "start_seconds": ["260", "400"], "properties": ["sound, smack, wind", "a woman, something, fried"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", "- a woman cooking in the kitchen"], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "multiple people speak and children yell while water gurgles"], "sample_ids": ["uEU-Hg5MTN8", "vb1fPSDI4c"], "start_seconds": ["27", "30"], "properties": ["a woman, laughs, animal", "multiple, people, yell"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["tDVADusiIoc", "uZesmtKZGSw"], "start_seconds": ["60", "250"], "properties": ["wind, radio, waves", "men, talk, cars"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a sleeping person emits a gravely snore", "a duck quacks loudly and continuously"], "sample_ids": ["w2JXXIAdUdg", "vh30P49Po6s"], "start_seconds": ["10", "30"], "properties": ["emits, sleeping, person", "loud, continuous, quacks"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "a duck quacks continuously"], "sample_ids": ["vcmWSmvti8", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["music, man, fire", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "an airplane engine runs"], "sample_ids": ["un9VQlzgZM", "yVPZ2MNWpms"], "start_seconds": ["5", "0"], "properties": ["females, talk, laugh", "engine, airplane, runs"], "captions_pred_video": [null, "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a car is driving by on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "a person speaks over rustling leaves"], "sample_ids": ["rwTERCUno", "zOZleIRqZm4"], "start_seconds": ["90", "80"], "properties": ["engine, idle, sputter", "rustling, leaves, person"], "captions_pred_video": [null, "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["an engine is idling and vibrating", "a man is speaking with crickets chirping in the background"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a horn honks and then loudly blares", "winds blows roughly as a vehicle races past"], "sample_ids": ["wnpJndXuxLc", "xjvTpk2Zpr8"], "start_seconds": ["50", "70"], "properties": ["horn, honk, loud", "wind, blows, vehicle"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a dog whimpers and a woman briefly talks", "a man laughs and speaks as cats purr and hiss"], "sample_ids": ["y1saVTXsKwc", "vVhthZ45k3Y"], "start_seconds": ["80", "30"], "properties": ["a, dog, talk", "cat, purr, hiss"], "captions_pred_video": ["a dog playing with a pink ball", "footage is blurry and out of focus"], "captions_pred_audio": ["a dog barks and a man speaks", "a man is speaking and a cat is meowing"], "question": "which animal is more active", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tEE3MpBt1sg", "yajyRTUQk3U"], "start_seconds": ["50", "400"], "properties": ["drill, something, laugh", "a woman, something, fried"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "- a woman cooking in the kitchen"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "a horn blasts as warning bells ring"], "sample_ids": ["sfAvvZwdLCY", "zgUgkpk78xU"], "start_seconds": ["20", "70"], "properties": ["flushes, drains, water", "horn, bells, ring"], "captions_pred_video": ["footage of the toilet in the bathroom", "of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108"], "captions_pred_audio": ["a toilet is flushed", "a train blows its horn as it speeds down the tracks "], "question": "which entity is louder", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "wind blows as people chatter quietly"], "sample_ids": ["sQGXqGcwOTc", "xBxDz0CFVn0"], "start_seconds": ["3", "30"], "properties": ["cling, speak, dishes", "wind, chatter, people"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage is blurry and out of focus"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["the rumbling of a bus followed by a soft male voice", "water flows followed by women screaming"], "sample_ids": ["vK93VuO0yNc", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["male voice, bus, rumble", "water, flow, women"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", null], "captions_pred_audio": ["a car drives by with wind noise in the background ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is followed by a soft male voice", "label": 0}, {"captions": ["a man woman speak while crickets sing", "a man speaks as a motor runs in the background"], "sample_ids": ["zTLVJCo4WEE", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["a, crickets, sing", "background, motor, run"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "a duck quacks continuously"], "sample_ids": ["uqFtmnhuqA8", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["a, b, c", "quacks, continuously, duck"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "wind blowing followed by a zoom"], "sample_ids": ["siJFXfGWgDk", "vr8ZXjEBhMQ"], "start_seconds": ["50", "150"], "properties": ["a, bird, vehicle", "wind, blow, zoom"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more like a natural phenomenon", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["y2bVZ7rz-5M", "wSVhSdj0F0"], "start_seconds": ["280", "10"], "properties": ["motor noise, horn, siren", "horn honks, keys jingle, electronic beep"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", null], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a car horn honks and keys jangle with background noise "], "question": "which entity has a horn honk and keys jingle?", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sQGXqGcwOTc", "uYT5gxnyMWM"], "start_seconds": ["3", "50"], "properties": ["cling, speak, dishes", "female, spraying, scream"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "an infant crying as a woman laughs"], "sample_ids": ["xl2PIWyXaM", "xhmRY9yhC7c"], "start_seconds": ["160", "20"], "properties": ["chirp, man, younger person", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["birds are chirping and people are talking", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["w34HjHr6gAY", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["beeps, squawk, child speaking", "two men, woman, birds"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", null], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "water splashes and a door squeaks"], "sample_ids": ["sOa7g-44Dag", "sdXV-ylviw"], "start_seconds": ["30", "190"], "properties": ["audio, scratching, man", "sound, splash, door"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", null], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a dog barks and taps with background noise "], "question": "which entity has a door squeaking?", "label": 1}, {"captions": ["a motorcycle engine works nearby", "water splashes as an animal walks through"], "sample_ids": ["tOSWIURC-4", "w1ir-sZ3Im8"], "start_seconds": ["0", "90"], "properties": ["engine, work, nearby", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a lawn mower is running ", "water splashes and gurgles as people speak"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a clock ticktocks briefly", "water pouring and bubbling"], "sample_ids": ["u7C-AEBQM", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["ticktocks, clock, ticktocks briefly", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a ticktock of a clock", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a vehicle engine accelerating then running on idle", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vYkA3cfXp5Q", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["engine, accelerate, idle", "airplane, boy, fly"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["an engine is idling", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a moving object", "label": 1}, {"captions": ["birds coo incessantly", "a person snores loudly multiple times at a close distance"], "sample_ids": ["yZrFNS7GFBQ", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["coo, bird, incessant", "loud, multiple, distance"], "captions_pred_video": ["of the bird in the cage", null], "captions_pred_audio": ["an owl hoots in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["water flows and trickles", "waves crash against a shoreline and people speak"], "sample_ids": ["tB7hWb9gTuQ", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["water, flow, trickle", "wave, crash, shoreline"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "footage of a person surfing in the ocean"], "captions_pred_audio": ["water is splashing and gurgling", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more calm", "label": 0}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sZPuqDgX2V0", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["engine, accelerate, intercom", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster crow?", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "here comes the train and it starts to blow the horn and get close"], "sample_ids": ["zofjfKhqLk8", "s7knHCFW82w"], "start_seconds": ["10", "30"], "properties": ["background, metal, clings", "blow horn, get close, train"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage of the train on the tracks near a building and a car parked on the side of the road"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a train is blowing its horn and its wheels are squealing "], "question": "which is a train", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["tK4VlLsNxak", "w5W5Kqtc8E"], "start_seconds": ["120", "100"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "wind, blow, vehicle"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", null], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "water rushes and then a vehicle zooms past"], "sample_ids": ["vqZuVbG6-HI", "s4Uz1Ffgo04"], "start_seconds": ["130", "100"], "properties": ["background, male, female", "water, rushes, vehicle"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is more active", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by an electronic beep", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["wSVhSdj0F0", "ukg5L09Wpvo"], "start_seconds": ["10", "150"], "properties": ["horn honks, keys jingle, electronic beep", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "continuous sneezing together with speech"], "sample_ids": ["vZAw4apG0Es", "x4dZyf9Gbj0"], "start_seconds": ["30", "130"], "properties": ["people, clock, converse", "continuous, sneeze, speech"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage is blurry and out of focus"], "captions_pred_audio": ["a clock is ticking and people are talking", "a woman sneezes and speaks"], "question": "which entity is more likely to be a recording", "label": 1}, {"captions": ["an aircraft engine runs", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["yLCORCnd35Q", "xKB8O8LTs6s"], "start_seconds": ["0", "70"], "properties": ["engine, aircraft, runs", "music, gunfire, explosion"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is a movie scene?", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "an insect buzzes around continuously"], "sample_ids": ["wTjoRj1se3U", "v25l1jef3JY"], "start_seconds": ["390", "0"], "properties": ["engine, run, people", "buzzes, continuously, insect"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a jet engine is running and people are talking", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["wvKpEYswXO0", "tiDFTC-5vU"], "start_seconds": ["150", "30"], "properties": ["sound, water, running", "male, duck, laugh"], "captions_pred_video": ["of the person preparing food in the kitchen", null], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking as others laugh?", "label": 1}, {"captions": ["a woman and man speak while food is frying", "small dogs yip and bark sharply"], "sample_ids": ["zk-xJGQU8-4", "v-wcQf4BDY0"], "start_seconds": ["130", "120"], "properties": ["food, man, woman", "bark, yip, sharply"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["sWZzXuWYY", "tiDFTC-5vU"], "start_seconds": ["420", "30"], "properties": ["male, speech, banging", "male, duck, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a man is speaking and ducks are quacking"], "question": "which entity is more humorous", "label": 1}, {"captions": ["a helicopter engine runs continuously", "small dogs yip and bark sharply"], "sample_ids": ["ugHJF0hfYkg", "v-wcQf4BDY0"], "start_seconds": ["10", "120"], "properties": ["engine, running, continuously", "bark, yip, sharply"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a helicopter is flying overhead ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a dog barks and whimpers", "pigeons vocalize and birds chirp"], "sample_ids": ["sShpyu2l4YQ", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["barks, whimpers, dog", "vocalize, bird, chirp"], "captions_pred_video": ["the puppies are playing with a toy", "of the pigeon in the cage"], "captions_pred_audio": ["a dog is barking and growling", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["birds coo incessantly", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["yZrFNS7GFBQ", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["coo, bird, incessant", "loud, laughter, intermittent"], "captions_pred_video": ["of the bird in the cage", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["an owl hoots in the background ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a speedboat passes quickly on the water", "someone snores nearby"], "sample_ids": ["tjmoSi330GM", "spJCm8tD9Zo"], "start_seconds": ["23", "90"], "properties": ["speed, water, boat", "someone snores, nearby, someone"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "a person is snoring loudly"], "question": "which entity is moving", "label": 0}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["s4Uz1Ffgo04", "uZesmtKZGSw"], "start_seconds": ["100", "250"], "properties": ["roars, background, people speaking", "men, talk, cars"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["people speak in the background as a clock ticktocks", "a car accelerates and wind blows"], "sample_ids": ["vZAw4apG0Es", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["background, clock, ticktocks", "accelerates, wind, blows"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a horse runs while two women talk", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sdvI1mHAsc", "wz7N8YRy74I"], "start_seconds": ["20", "30"], "properties": ["two women, horse, run", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has more people", "label": 1}, {"captions": ["a person is snoring while sleeping", "a cat meows as a young woman speaks"], "sample_ids": ["vJrjSeP17yE", "x5cuQjOdM3E"], "start_seconds": ["40", "30"], "properties": ["a person is sleeping, snoring, person", "cat, meows, young woman"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a black background with an airplane flying in the sky"], "captions_pred_audio": ["a person snoring loudly", "a cat meows and a woman speaks"], "question": "which entity is a person", "label": 0}, {"captions": ["a horn honks twice and keys jingle, followed by an electronic beep", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["wSVhSdj0F0", "uEU-Hg5MTN8"], "start_seconds": ["10", "27"], "properties": ["horn honks, keys jingle, electronic beep", "animal, grunts, snorts"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["a man woman speak while crickets sing", "three men talk while wind blows and some liquid flows"], "sample_ids": ["zTLVJCo4WEE", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["a, crickets, sing", "three men, wind, flow"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["a man talks as several small engines run", "a horn rings out as a machine runs by"], "sample_ids": ["u9A6VZQCZpU", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["a, man, talk", "a, horn, run"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is about a machine running by?", "label": 1}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "a speedboat passes quickly on the water"], "sample_ids": ["ukg5L09Wpvo", "tjmoSi330GM"], "start_seconds": ["150", "23"], "properties": ["clickety-clack, train, whistle", "speed, water, boat"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "a person riding a jet ski on a lake with trees in the background"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a motorboat speeds through water with wind noise "], "question": "which is moving faster", "label": 0}, {"captions": ["a jet engine screams, then increases its power", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vBslzh7saPw", "tiDFTC-5vU"], "start_seconds": ["90", "30"], "properties": ["power, scream, increase", "male, duck, laugh"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "winds blows roughly as a vehicle races past"], "sample_ids": ["sapQIQUhFc", "xjvTpk2Zpr8"], "start_seconds": ["280", "70"], "properties": ["water, trickles, flow", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a jet engine roars and wind blows "], "question": "which entity is moving", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "people cheer as a vehicle engine revs"], "sample_ids": ["y8WEcpOlT3I", "xjhAnI2q6hM"], "start_seconds": ["40", "6"], "properties": ["harsh, wind, blows", "engine revs, vehicle, people"], "captions_pred_video": ["on how to use a sewing machine youtube", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["tPJvjq9QePY", "sSMl2vc3ek"], "start_seconds": ["40", "20"], "properties": ["animal, bleat, moo", "loud, multiple, distance"], "captions_pred_video": ["a dog and a sheep in a barn", null], "captions_pred_audio": ["a baby cries and a man speaks", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["vdoxuJn9lTc", "y2bVZ7rz-5M"], "start_seconds": ["40", "280"], "properties": ["burp, loud, girl", "motor noise, horn, siren"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a child speaks followed by a burp", "a truck is honking its horn and a siren is blaring "], "question": "which entity is followed by a horn honking and a siren wailing?", "label": 1}, {"captions": ["a duck quacks several times", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vh30P49Po6s", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["quacks, duck, several", "female, spraying, scream"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a duck is quacking loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["paper is crumpling consistently", "a woman talking as an infant is crying"], "sample_ids": ["v5cSxLaHADY", "tMbMDvT50j8"], "start_seconds": ["0", "12"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "a, talk, infant"], "captions_pred_video": ["footage of the person holding a pair of scissors", "shows a little girl covering her face with her hands while sitting at a table"], "captions_pred_audio": ["paper is crumpled and crinkled", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["small dogs yip and bark sharply", "children speak and play together"], "sample_ids": ["v-wcQf4BDY0", "yVVP8XvWJTo"], "start_seconds": ["120", "260"], "properties": ["bark, yip, sharply", "children, speak, play"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "footage of a playground at a school or daycare center"], "captions_pred_audio": ["a dog barks and growls", "children are speaking and breathing with background noise "], "question": "which entity is more social", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "an airplane engine runs"], "sample_ids": ["tEE3MpBt1sg", "yVPZ2MNWpms"], "start_seconds": ["50", "0"], "properties": ["drill, something, laugh", "engine, airplane, runs"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a car is driving by on the road "], "question": "which entity is a machine", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vhJWZheqaE", "ukg5L09Wpvo"], "start_seconds": ["0", "150"], "properties": ["water drains unevenly, toilet flushes, water drains", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a toilet is flushed", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["people speak and tapping occurs", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tFCUUGdREgA", "tdWhHV3X25Q"], "start_seconds": ["70", "60"], "properties": ["people, tap, speak", "applause, audience, yells"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["zkKdxzNC97Y", "sLUnaPT5gM8"], "start_seconds": ["27", "0"], "properties": ["hard, surface, door", "loud, laughter, intermittent"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a door is opened and closed", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["wRBHTgrbiwg", "zFjIWfSD-4"], "start_seconds": ["50", "410"], "properties": ["bird, owl, speak", "People, motor, brakes"], "captions_pred_video": ["of a bee pollinating the flowers in the field", null], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a man speaking briefly?", "label": 0}, {"captions": ["a loud snarling engine is followed by a man laughing", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["zl9Dqx-j7q4", "wDVMhEdTiVw"], "start_seconds": ["6", "30"], "properties": ["engine, laugh, loud", "gun, shoot, water"], "captions_pred_video": ["footage of a man driving a car in the dark", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a jet engine roars ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is followed by water sloshing", "label": 1}, {"captions": ["multiple birds vocalize and wind blows", "a young woman speaks over spraying and another person yells"], "sample_ids": ["uoGVs9yUqY4", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["multiple, vocalize, wind", "person, spray, yell"], "captions_pred_video": ["for how to make a wooden shed door youtube", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "an infant crying as a woman laughs"], "sample_ids": ["xjhAnI2q6hM", "xhmRY9yhC7c"], "start_seconds": ["6", "20"], "properties": ["engine revs, vehicle, people", "a, laugh, infant"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a toilet flushes and water drains", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sfAvvZwdLCY", "xKB8O8LTs6s"], "start_seconds": ["20", "70"], "properties": ["water drains, flushes, water", "music, gunfire, explosion"], "captions_pred_video": ["footage of the toilet in the bathroom", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a toilet is flushed", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is a scene of a toilet flushing and water draining?", "label": 0}, {"captions": ["a kid speaks followed by music playing", "dishes cling together then a man begins to speak"], "sample_ids": ["tQWGZLItBXk", "sQGXqGcwOTc"], "start_seconds": ["170", "3"], "properties": ["music, kid, speak", "cling, speak, dishes"], "captions_pred_video": ["worms revolution screenshots", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "mechanisms are operating and water is splashing "], "question": "which entity is about a kid speaking?", "label": 0}, {"captions": ["a railroad crossing bell rings as a train horn blows", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["tZGN5a7ybxo", "xfaoyyzw2WU"], "start_seconds": ["60", "180"], "properties": ["ring, train, horn", "loud, jet engine, roar"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a train is moving and blowing its horn ", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["waves crash against a shoreline and people speak", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["yFB25fqfU8I", "yDoT73BWsdA"], "start_seconds": ["300", "10"], "properties": ["wave, crash, shoreline", "engine, revs, vehicle"], "captions_pred_video": ["footage of a person surfing in the ocean", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "pigeons vocalize and birds chirp"], "sample_ids": ["wnpJndXuxLc", "uiS58TNyUiw"], "start_seconds": ["50", "430"], "properties": ["blows, vehicle, train", "vocalize, bird, chirp"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "of the pigeon in the cage"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["y1saVTXsKwc", "vfYTJq7nU"], "start_seconds": ["80", "130"], "properties": ["a, dog, talk", "rustling, ducks, quack"], "captions_pred_video": ["a dog playing with a pink ball", null], "captions_pred_audio": ["a dog barks and a man speaks", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 0}, {"captions": ["a person is burping then speaks and laughs", "a duck quacks loudly and continuously"], "sample_ids": ["wAAkbZToh8", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["burp, laugh, speak", "loud, continuous, quacks"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man burps and a woman speaks", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["heavy rain splashes as it falls", "a series of light horn beeps is followed by a loud steam whistle"], "sample_ids": ["wP8ZKrlx3oA", "wnpJndXuxLc"], "start_seconds": ["40", "50"], "properties": ["fall, rain, splash", "beeps, loud, whistle"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "a group of people chatter and talk as multiple horns honk in the background"], "sample_ids": ["tPJvjq9QePY", "yLy-WycbVVE"], "start_seconds": ["40", "30"], "properties": ["animal, bleat, moo", "background, people, talk"], "captions_pred_video": ["a dog and a sheep in a barn", "a soccer field in a stadium with yellow and red seats"], "captions_pred_audio": ["a baby cries and a man speaks", "a man is speaking and a church bell is ringing with wind noise in the background "], "question": "which entity is talking", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "wind blows as people chatter quietly"], "sample_ids": ["vJ7JPEFhyLA", "xBxDz0CFVn0"], "start_seconds": ["16", "30"], "properties": ["three men, wind, flow", "wind, chatter, people"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["waves crash against a shoreline and people speak", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["yFB25fqfU8I", "uEU-Hg5MTN8"], "start_seconds": ["300", "27"], "properties": ["wave, crash, shoreline", "a woman, laughs, animal"], "captions_pred_video": ["footage of a person surfing in the ocean", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a stream of water runs briefly"], "sample_ids": ["x5cuQjOdM3E", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["cat, meows, young woman", "stream, water, run"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a cat meows and a woman speaks", "a car is driving on a wet road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["an engine runs and wind blows", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["vs65y4qmyBE", "yDoT73BWsdA"], "start_seconds": ["340", "10"], "properties": ["engine, run, wind", "engine, revs, vehicle"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "a child speaks in closed space"], "sample_ids": ["zl9Dqx-j7q4", "yW6FWLSLkx4"], "start_seconds": ["6", "40"], "properties": ["motors rev, laugh, loudly", "child, space, speak"], "captions_pred_video": ["footage of a man driving a car in the dark", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a jet engine roars ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["wwyfGO2J4", "yajyRTUQk3U"], "start_seconds": ["90", "400"], "properties": ["people, applaud, hoot", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a woman is speaking while food is frying in the background"], "question": "which entity is about food?", "label": 1}, {"captions": ["a man talks while vehicles pass by", "some men converse over an engine running"], "sample_ids": ["sK4u5T8hW78", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["a, man, talk", "men, converse, engine"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows a man talking while vehicles pass by?", "label": 0}, {"captions": ["wind blows and people scream while an engine revs", "paper is crumpling consistently"], "sample_ids": ["w5W5Kqtc8E", "v5cSxLaHADY"], "start_seconds": ["100", "0"], "properties": ["wind, engine, scream", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "paper is crumpled and crinkled"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["yLy-WycbVVE", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["background, people, talk", "male, duck, laugh"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", null], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a man is speaking and ducks are quacking"], "question": "which entity is more likely to be a joke", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "a woman speaks as she rubs two objects together"], "sample_ids": ["slZLHwNbbt4", "vzxHnu-SFEw"], "start_seconds": ["300", "80"], "properties": ["clap, distance, horn", "two objects, woman, speak"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["xfudFO976zE", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["animal, bleats, cry", "animal, grunts, snorts"], "captions_pred_video": ["footage is blurry and shaky", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a woman is speaking and a baby is crying"], "question": "which animal is grunting and snorting", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "someone is typing on a computer keyboard"], "sample_ids": ["xyL9F5VrjkE", "v0x1odnXtP0"], "start_seconds": ["20", "210"], "properties": ["wind, blows, vehicle", "keyboard, type, computer"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "how to make money on youtube in spanish"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a person is typing on a keyboard"], "question": "which is not a vehicle", "label": 1}, {"captions": ["people speak as gunfire rings out", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wqTCwqVRDlk", "xKB8O8LTs6s"], "start_seconds": ["80", "70"], "properties": ["gunfire, ring, speak", "music, gunfire, explosion"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and a gun is fired", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["a child speaks", "a toilet flushes and water drains"], "sample_ids": ["yW6FWLSLkx4", "sfAvvZwdLCY"], "start_seconds": ["40", "20"], "properties": ["a, child, speaks", "water drains, flushes, water"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "water flows as men speak and yell"], "sample_ids": ["s4Uz1Ffgo04", "vJ7JPEFhyLA"], "start_seconds": ["100", "16"], "properties": ["roars, background, people speaking", "water, flow, men"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motor idles, accelerates, then slows down.", "a propeller moves loudly nearby"], "sample_ids": ["vYkA3cfXp5Q", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["speed, idle, accelerate", "loud, propeller, move"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["an engine is idling", "a helicopter is flying overhead "], "question": "which entity is moving at a constant speed", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["wqADXCzngMw", "ukg5L09Wpvo"], "start_seconds": ["340", "150"], "properties": ["engine, idle, man", "clickety-clack, train, whistle"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a train blows its whistle and blows its horn "], "question": "which train is going to be moving?", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a man speaks as a car is passing by"], "sample_ids": ["ugHJF0hfYkg", "sK4u5T8hW78"], "start_seconds": ["10", "30"], "properties": ["engine, running, continuously", "a, car, pass"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is stationary", "label": 1}, {"captions": ["birds vocalize and a man speaks", "a horn blows as a train chugs along and warning bells ring"], "sample_ids": ["v0wPrLBI3hg", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["vocalize, bird, speak", "a train, a horn, a bell"], "captions_pred_video": ["footage of the pigeons feeding on the ground", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "a train blows its whistle and blows its horn "], "question": "which entity is a warning", "label": 1}, {"captions": ["wind blows strongly and a young man speaks", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vs65y4qmyBE", "w5W5Kqtc8E"], "start_seconds": ["340", "100"], "properties": ["wind, blows, strongly", "wind, blow, vehicle"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blows?", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "a telephone rings followed by a woman talking"], "sample_ids": ["vZAw4apG0Es", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["people, clock, converse", "ring, talk, woman"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation between two people?", "label": 0}, {"captions": ["paper folding and crinkling", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["zPpG3RD8lSs", "xKB8O8LTs6s"], "start_seconds": ["20", "70"], "properties": ["paper, fold, crinkle", "music, gunfire, explosion"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be a movie", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "a steam engine runs and whistles as it passes by"], "sample_ids": ["zY3icUyMdh8", "se87d6yxEOA"], "start_seconds": ["20", "10"], "properties": ["dog, bark, engine", "run, whistle, pass"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage of a train passing by a train station with smoke billowing out of the train's smokestack"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a train is moving and blowing its whistle "], "question": "which entity is a steam engine?", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["yJ0TePmaOo", "su6FAOcOA8c"], "start_seconds": ["390", "4"], "properties": ["two hard objects, man, speak", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a woman is speaking and a subway train is moving "], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["a small engine idles continuously", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["y5WII6cTH7k", "yDoT73BWsdA"], "start_seconds": ["40", "10"], "properties": ["engine, idle, continuously", "engine, revs, vehicle"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a race car accelerates and revs its engine "], "question": "which engine is revving", "label": 1}, {"captions": ["pigeons vocalize and birds chirp", "a man talks as several small engines run"], "sample_ids": ["uiS58TNyUiw", "u9A6VZQCZpU"], "start_seconds": ["430", "30"], "properties": ["vocalize, bird, chirp", "a, man, talk"], "captions_pred_video": ["of the pigeon in the cage", null], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a man is speaking while a race car is revving and accelerating "], "question": "which entity is talking", "label": 1}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "a stream of water runs briefly"], "sample_ids": ["vddP56-ogds", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["water, splash, person, laugh", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "an insect buzzes around continuously"], "sample_ids": ["wwyfGO2J4", "v25l1jef3JY"], "start_seconds": ["90", "0"], "properties": ["people, applaud, hoot", "buzzes, continuously, insect"], "captions_pred_video": [null, "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["people speak in a closed space", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["sTpirNYo8vQ", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["people, space, speak", "animal, grunts, snorts"], "captions_pred_video": ["of a man taking a selfie on a bus", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["an airplane engine spools and people speak", "the revving of an engine throttle followed by a man speaking"], "sample_ids": ["wTjoRj1se3U", "tezvROoo4bs"], "start_seconds": ["390", "40"], "properties": ["airplane, engine, spool", "audio, throttle, speaking"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "footage of a busy city street with cars parked on both sides of the road"], "captions_pred_audio": ["a jet engine is running and people are talking", "a car accelerates and revs while a man speaks "], "question": "which entity is about an airplane engine?", "label": 0}, {"captions": ["young female child snoring and breathing deeply", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["sAam2NqGhLY", "vfYTJq7nU"], "start_seconds": ["20", "130"], "properties": ["snoring, breathing, child", "rustling, ducks, quack"], "captions_pred_video": ["of a little girl sleeping on a couch", null], "captions_pred_audio": ["a person is snoring", "a duck quacks and a woman speaks"], "question": "which entity is a video of a child?", "label": 0}, {"captions": ["a man speaks then multiple motorcycles pass by", "a telephone rings followed by a woman talking"], "sample_ids": ["zcDwZ6W7E3E", "tGcFnX0GHI"], "start_seconds": ["180", "0"], "properties": ["a, man, speak", "ring, talk, woman"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", null], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a person whistles and clicks a mouse", "a train horn blows as it passes by"], "sample_ids": ["zCrAfDfv6-A", "zVacuqSb4LI"], "start_seconds": ["30", "30"], "properties": ["person, mouse, click", "horn, blows, train"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a person whistles a song", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["several ducks are quacking and squawking", "a baby cries and fusses, a woman speaks, and a man speaks"], "sample_ids": ["wfHeoPDLMaM", "wyllXV6PjKo"], "start_seconds": ["30", "30"], "properties": ["quacking, squawking, ducks", "a baby, a woman, a man"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", null], "captions_pred_audio": ["ducks are quacking", "a woman speaks and a baby cries"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as a machine runs", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vD6lYD1l0BY", "uYT5gxnyMWM"], "start_seconds": ["330", "50"], "properties": ["a, machine, run", "a, scream, girl"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["water pouring and bubbling", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["uyRfq-jKPpo", "tdWhHV3X25Q"], "start_seconds": ["50", "60"], "properties": ["water, bubbles, pouring", "applause, audience, yells"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["water is running from a faucet", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "a child speaks in closed space"], "sample_ids": ["se87d6yxEOA", "yW6FWLSLkx4"], "start_seconds": ["10", "40"], "properties": ["run, whistle, pass", "child, space, speak"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["s6DESzUTGjY", "w5W5Kqtc8E"], "start_seconds": ["16", "100"], "properties": ["wind, laugh, woman", "wind, blow, vehicle"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", null], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blows before women yell?", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "multiple people speak and children yell while water gurgles"], "sample_ids": ["xyL9F5VrjkE", "vb1fPSDI4c"], "start_seconds": ["20", "30"], "properties": ["wind, motor, distance", "multiple, people, yell"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", null], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["children speak as a female ask them questions", "people cheer as a vehicle engine revs"], "sample_ids": ["wEBlkGWVWwE", "xjhAnI2q6hM"], "start_seconds": ["260", "6"], "properties": ["female, speak, questions", "engine revs, vehicle, people"], "captions_pred_video": ["shows a person writing on the whiteboard", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a diesel truck engine runs continuously", "winds blows roughly as a vehicle races past"], "sample_ids": ["sZvwOuuPGP0", "xjvTpk2Zpr8"], "start_seconds": ["50", "70"], "properties": ["engine, diesel, truck", "wind, blows, vehicle"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a medium engine is running ", "a jet engine roars and wind blows "], "question": "which entity is moving", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["uYT5gxnyMWM", "w34HjHr6gAY"], "start_seconds": ["50", "30"], "properties": ["person, spray, yell", "beeps, hit, woman"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "paper is crumpling consistently"], "sample_ids": ["wRV8yMk886E", "v5cSxLaHADY"], "start_seconds": ["0", "0"], "properties": ["liquid, spray, nozzle", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["two cars are parked in a parking lot at night", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man speaks followed by a loud burst", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["speaking following by laughing and clapping", "a horn rings out as a machine runs by"], "sample_ids": ["u2f5NpsoHBg", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["person, laugh, clap", "a, horn, run"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is not a person?", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["s3cTDAj31g", "vlS6YMeWAPo"], "start_seconds": ["80", "40"], "properties": ["man, talk, woman", "sheep, baa, birds"], "captions_pred_video": [null, "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a man is speaking and a baby is crying", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "a woman speaks as she rubs two objects together"], "sample_ids": ["zuua6-5goWw", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["birds, chirp, quiet, man, speaks", "two objects, woman, speak"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["a beep repeats multiple times", "a horn honks twice and keys jingle, followed by a slam and an electronic beep"], "sample_ids": ["y682ml90jGw", "wSVhSdj0F0"], "start_seconds": ["11", "10"], "properties": ["beep, repeat, multiple", "horn honks, keys jingle, slam"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "a car horn honks and keys jangle with background noise "], "question": "which entity has a slam?", "label": 1}, {"captions": ["a woman and man are speaking", "someone is typing on a computer keyboard"], "sample_ids": ["vbpKkWvfOu4", "v0x1odnXtP0"], "start_seconds": ["560", "210"], "properties": ["two people, speaking, woman, man", "keyboard, type, computer"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "how to make money on youtube in spanish"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a person is typing on a keyboard"], "question": "which is not a person", "label": 1}, {"captions": ["some men converse over an engine running", "a clock ticktocks briefly"], "sample_ids": ["sCiy7QS1U", "u7C-AEBQM"], "start_seconds": ["300", "30"], "properties": ["men, converse, engine", "ticktocks, clock, ticktocks briefly"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a ticktock of a clock"], "question": "which entity is a clock?", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vimzuGQvdcU", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["a, man, yells", "three men, wind, flow"], "captions_pred_video": ["a group of people are rafting down a river", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more people", "label": 1}, {"captions": ["a speedboat passes quickly on the water", "a duck quacks continuously"], "sample_ids": ["tjmoSi330GM", "vh30P49Po6s"], "start_seconds": ["23", "30"], "properties": ["speed, water, boat", "quacks, continuously, duck"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "three men talk while wind blows and some liquid flows"], "sample_ids": ["xjvTpk2Zpr8", "vJ7JPEFhyLA"], "start_seconds": ["70", "16"], "properties": ["wind, blows, vehicle", "three men, wind, flow"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows a vehicle racing past?", "label": 0}, {"captions": ["a person speaks briefly", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["zOZleIRqZm4", "wSVhSdj0F0"], "start_seconds": ["80", "10"], "properties": ["person, talk, brief", "horn honks, keys jingle, electronic beep"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a car horn honks and keys jangle with background noise "], "question": "which entity is not a person?", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "frogs croak and vocalize"], "sample_ids": ["uOpoD0gGXcs", "yswmmRZFItk"], "start_seconds": ["120", "0"], "properties": ["chirps, woman, bird", "croak, vocalize, frog"], "captions_pred_video": ["a herd of cows grazing in the field", "a close up of a frog in the water"], "captions_pred_audio": ["birds are chirping and a man is speaking", "a frog is croaking"], "question": "which animal is vocalizing", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["uC9dtII1KDI", "sLUnaPT5gM8"], "start_seconds": ["150", "0"], "properties": ["wind, gusts, distance", "loud, laughter, intermittent"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["an engine revs and a turning noise is made", "a person is whistling a tune"], "sample_ids": ["tOSWIURC-4", "scYRUkrFLiQ"], "start_seconds": ["0", "30"], "properties": ["noise, engine, revs", "a, tune, whistle"], "captions_pred_video": [null, "of the man wearing a bow tie and a suit jacket in front of a red door"], "captions_pred_audio": ["a lawn mower is running ", "a person whistling a song"], "question": "which entity is not a noise", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "several insects fly while two men talk"], "sample_ids": ["xfaoyyzw2WU", "s-T9OVOiMLo"], "start_seconds": ["180", "330"], "properties": ["loud, jet engine, roar", "several, fly, men"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a man is speaking while insects are buzzing in the background "], "question": "which is not a man", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "a man speaks as a motor runs in the background"], "sample_ids": ["wtDqrBygTcU", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["man, engine, run", "background, motor, run"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking and a motor is running", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a boat?", "label": 0}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["zkKdxzNC97Y", "wDVMhEdTiVw"], "start_seconds": ["27", "30"], "properties": ["loud, bang, noise", "gun, shoot, water"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a door is opened and closed", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity has a loud bang followed by a softer banging noise?", "label": 0}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["weDbePuc-Xc", "zl9Dqx-j7q4"], "start_seconds": ["40", "6"], "properties": ["cartoon character, music, vocalize", "engine, laugh, loud"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "an airplane engine runs"], "sample_ids": ["se87d6yxEOA", "yVPZ2MNWpms"], "start_seconds": ["10", "0"], "properties": ["run, whistle, pass", "engine, airplane, runs"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a car is driving by on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "a mechanical buzzing getting louder"], "sample_ids": ["y2ZBGpgbhHM", "sEprKHm8Sj8"], "start_seconds": ["30", "90"], "properties": ["birds, tweet, pant", "noise, loud, buzzing"], "captions_pred_video": [null, "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["birds chirping and a dog panting", "a race car accelerates and revs its engine "], "question": "which entity is a noise", "label": 1}, {"captions": ["some people speak", "a goat bleats and someone makes a calling noise"], "sample_ids": ["vbZ-0lGPneg", "vlS6YMeWAPo"], "start_seconds": ["30", "40"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "noise, bleat, call"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a goat bleats and birds chirp"], "question": "which entity is a noise", "label": 1}, {"captions": ["a person speaks over rustling leaves", "loud clanking and banging with brief male speech"], "sample_ids": ["zOZleIRqZm4", "sWZzXuWYY"], "start_seconds": ["80", "420"], "properties": ["rustling, leaves, person", "male, speech, banging"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a sewing machine runs and a man speaks"], "question": "which entity is more quiet", "label": 0}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["tQWGZLItBXk", "uYT5gxnyMWM"], "start_seconds": ["170", "50"], "properties": ["music, person, ding", "a, scream, girl"], "captions_pred_video": ["worms revolution screenshots", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a woman speaks and food sizzles while frying", "a car speeding up in the distance"], "sample_ids": ["wTideSjRFS0", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["food, sizzle, woman", "distance, car, speed"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "an animal bleats and cries out and metal bangs"], "sample_ids": ["sYITalLZjj4", "xfudFO976zE"], "start_seconds": ["30", "0"], "properties": ["water, rushes, background, birds", "animal, bleats, cry"], "captions_pred_video": ["two ducks are swimming in the water near each other", "footage is blurry and shaky"], "captions_pred_audio": ["wind blows and birds chirp", "a goat bleats and birds chirp in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["an airplane engine roars increasingly louder", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vBslzh7saPw", "tDVADusiIoc"], "start_seconds": ["90", "60"], "properties": ["engine, roar, louder", "water, radio, man"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "wind blows as people chatter quietly"], "sample_ids": ["zY3icUyMdh8", "xBxDz0CFVn0"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "wind, chatter, people"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage is blurry and out of focus"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a person is whistling", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["sIXTftIuUgw", "zFjIWfSD-4"], "start_seconds": ["90", "410"], "properties": ["person, whistling, person", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person whistling a song", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a person", "label": 0}, {"captions": ["a vehicle engine revs and tires squeal", "a drill drills through something then people begin laughing"], "sample_ids": ["yDoT73BWsdA", "tEE3MpBt1sg"], "start_seconds": ["10", "50"], "properties": ["engine revs, tires squeal, vehicle", "drill, something, laugh"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "people are laughing breathing and speaking with background noise "], "question": "which entity is a drill?", "label": 1}, {"captions": ["a train horn sounds as a railroad passing bell rings", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["zgUgkpk78xU", "vfYTJq7nU"], "start_seconds": ["70", "130"], "properties": ["horn, bell, train", "rustling, ducks, quack"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", null], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a duck quacks and a woman speaks"], "question": "which entity is about a train?", "label": 0}, {"captions": ["a loud snarling engine is followed by a man laughing", "a person sneezes followed by another person speaking"], "sample_ids": ["zl9Dqx-j7q4", "t8CV69hcvF0"], "start_seconds": ["6", "210"], "properties": ["engine, laugh, loud", "person, sneeze, follow"], "captions_pred_video": ["footage of a man driving a car in the dark", "of an airplane flying in the dark sky at night"], "captions_pred_audio": ["a jet engine roars ", "a woman sneezes and speaks"], "question": "which entity is followed by a person speaking", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "a man speaks as a car is passing by"], "sample_ids": ["yRx9txMcBl0", "sK4u5T8hW78"], "start_seconds": ["40", "30"], "properties": ["accelerates, tires, squeals", "a, car, pass"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["heavy rain splashes as it falls", "water flows as men speak and yell"], "sample_ids": ["wP8ZKrlx3oA", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["fall, rain, splash", "water, flow, men"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["a large crowd cheers and applauds", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["rqfQRErjfk8", "ukg5L09Wpvo"], "start_seconds": ["170", "150"], "properties": ["crowd, cheers, applauds", "clickety-clack, train, whistle"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a train blows its whistle and blows its horn "], "question": "which entity is quieter", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["wyllXV6PjKo", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["a baby, a woman, a man", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman speaks and a baby cries", "a woman is speaking and a baby is crying"], "question": "which entity has a baby?", "label": 0}, {"captions": ["an airplane flies overhead as a woman speaks", "a toilet flushes and a female speaks"], "sample_ids": ["zj2R0XoFr5k", "yaln9y8I7ms"], "start_seconds": ["50", "230"], "properties": ["airplane, fly, overhead", "female, flushes, toilet"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a toilet flushes and a man speaks"], "question": "which entity is a woman speaking?", "label": 0}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a man speaks as a motor runs in the background"], "sample_ids": ["weDbePuc-Xc", "xZepNM9qcRA"], "start_seconds": ["40", "30"], "properties": ["music, slaps, human", "background, motor, run"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man speaks as a machine runs", "an infant crying frantically"], "sample_ids": ["vD6lYD1l0BY", "zwOBqeFTgiU"], "start_seconds": ["330", "30"], "properties": ["a, machine, run", "cry, infant, frantically"], "captions_pred_video": ["game controller being held in the hands of the person", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["multiple ducks quack continuously", "paper is crumpling consistently"], "sample_ids": ["wfHeoPDLMaM", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["multiple, quack, continuously", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["ducks are quacking", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "a infant makes noise and is excited"], "sample_ids": ["zl9Dqx-j7q4", "wIJK3-5y0kA"], "start_seconds": ["6", "30"], "properties": ["engine, laugh, loud", "noise, excited, infant"], "captions_pred_video": ["footage of a man driving a car in the dark", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a jet engine roars ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a car accelerates and wind blows", "an engine sputters followed by a car zooming by"], "sample_ids": ["u0TrcHhkPQ", "u5RmF3c3Aw"], "start_seconds": ["20", "60"], "properties": ["accelerates, wind, blows", "engine, car, zoom"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a race car accelerates and skids with wind noise in the background "], "question": "which car is zooming by", "label": 1}, {"captions": ["a woman speaks and dog vocalizes", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["uWAAAL4CIoc", "tdWhHV3X25Q"], "start_seconds": ["0", "60"], "properties": ["a, dog, vocalize", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["rwTERCUno", "xfaoyyzw2WU"], "start_seconds": ["90", "180"], "properties": ["engine, idle, sputter", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["an engine is idling and vibrating", "an aircraft engine roars and a man speaks "], "question": "which engine is louder", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "water rushes and then a vehicle zooms past"], "sample_ids": ["v0x1odnXtP0", "s4Uz1Ffgo04"], "start_seconds": ["210", "100"], "properties": ["keyboard, type, computer", "water, rushes, vehicle"], "captions_pred_video": ["how to make money on youtube in spanish", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a person is typing on a keyboard", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is more likely to be seen in a movie", "label": 1}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "a woman speaks and is crumpling paper"], "sample_ids": ["zgUgkpk78xU", "xvDdE3zNf8Y"], "start_seconds": ["70", "120"], "properties": ["clinking, humming, horn", "A, crumple, paper"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "of a woman in a white shirt and glasses holding a purple tie"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a woman speaks and crumples paper"], "question": "which entity is crumpling paper", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a duck quacks continuously"], "sample_ids": ["tOSWIURC-4", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["engine, work, nearby", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a lawn mower is running ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "a stream of water runs briefly"], "sample_ids": ["xO-Q2BlIIPU", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["two men, exclamation, speak", "stream, water, run"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "multiple people speak and children yell while water gurgles"], "sample_ids": ["uEU-Hg5MTN8", "vb1fPSDI4c"], "start_seconds": ["27", "30"], "properties": ["animal, grunts, snorts", "multiple, people, yell"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vJ7JPEFhyLA", "zl9Dqx-j7q4"], "start_seconds": ["16", "6"], "properties": ["three men, wind, flow", "engine, laugh, loud"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "birds chirp as a man speaks and a younger person speaks"], "sample_ids": ["w2M4i1mklOA", "xl2PIWyXaM"], "start_seconds": ["30", "160"], "properties": ["loud, chime, bell", "chirp, man, younger person"], "captions_pred_video": ["footage of an antique clock", null], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "birds are chirping and people are talking"], "question": "which entity is quieter", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["xERFUeZONz8", "w5W5Kqtc8E"], "start_seconds": ["0", "100"], "properties": ["ring, approach, traffic", "wind, blow, vehicle"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", null], "captions_pred_audio": ["an emergency vehicle siren blares", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "small dogs yip and bark sharply"], "sample_ids": ["wztCSUxOf8", "v-wcQf4BDY0"], "start_seconds": ["130", "120"], "properties": ["a crowd, yells, applauds", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a dog barks and growls"], "question": "which entity is more likely to be a group of people", "label": 0}, {"captions": ["a sleeping person snores and wheezes", "a woman speaks as she rubs two objects together"], "sample_ids": ["spJCm8tD9Zo", "vzxHnu-SFEw"], "start_seconds": ["90", "80"], "properties": ["snores, wheezes, sleeps", "two objects, woman, speak"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 0}, {"captions": ["continuous sneezing together with speech", "a man speaks as insects buzz and a bird chirps"], "sample_ids": ["x4dZyf9Gbj0", "t25U-v4k4ts"], "start_seconds": ["130", "40"], "properties": ["continuous, sneeze, speech", "a, chirps, bird"], "captions_pred_video": ["footage is blurry and out of focus", "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["a woman sneezes and speaks", "a man is speaking and bees are buzzing"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["w6RTHR6AeAg", "vfYTJq7nU"], "start_seconds": ["40", "130"], "properties": ["call, owl, screech", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a duck quacks and a woman speaks"], "question": "which entity is a bird?", "label": 0}, {"captions": ["birds chirp, a woman speaks, and insects buzz", "a man speaks as a motor runs in the background"], "sample_ids": ["t97k0cejSQE", "xZepNM9qcRA"], "start_seconds": ["250", "30"], "properties": ["sound, chirp, buzz", "background, motor, run"], "captions_pred_video": ["a bee on a purple thistle flower", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a woman speaks and food sizzles while frying", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wTideSjRFS0", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["food, sizzle, woman", "loud, multiple, distance"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["tIY7qOV3rEM", "wqZ135Ssz0"], "start_seconds": ["0", "60"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "two men, woman, birds"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", null], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more animals", "label": 0}, {"captions": ["a horn blasts as warning bells ring", "a child speaks in closed space"], "sample_ids": ["zgUgkpk78xU", "yW6FWLSLkx4"], "start_seconds": ["70", "40"], "properties": ["horn, bells, ring", "child, space, speak"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["an engine runs and wind blows", "a man speaks followed by another man speaking outside"], "sample_ids": ["vs65y4qmyBE", "viuTg1M-dqg"], "start_seconds": ["340", "30"], "properties": ["engine, run, wind", "two men, speak, follow"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a man speaking followed by another man speaking outside?", "label": 1}, {"captions": ["a baby cries and a woman speaks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["tMbMDvT50j8", "vfYTJq7nU"], "start_seconds": ["12", "130"], "properties": ["a, cry, woman", "rustling, ducks, quack"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a duck quacks and a woman speaks"], "question": "which entity is about a baby?", "label": 0}, {"captions": ["a woman and man speak while food is frying", "a vehicle engine revs and tires squeal"], "sample_ids": ["zk-xJGQU8-4", "yDoT73BWsdA"], "start_seconds": ["130", "10"], "properties": ["food, man, woman", "engine revs, tires squeal, vehicle"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a church bell rings several times", "several insects fly while two men talk"], "sample_ids": ["sUVVjE3Ucp8", "s-T9OVOiMLo"], "start_seconds": ["0", "330"], "properties": ["ring, bell, several", "several, fly, men"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a church bell is ringing ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a large engine roars and a strong wind blows", "a sputtering motor idles roughly"], "sample_ids": ["snFy48Lv3r8", "rwTERCUno"], "start_seconds": ["30", "90"], "properties": ["engine, roar, wind", "a, idle, motor"], "captions_pred_video": [null, null], "captions_pred_audio": ["the wind is blowing and water is splashing", "an engine is idling and vibrating"], "question": "which entity is quieter", "label": 1}, {"captions": ["a stream of water flows quickly", "a woman speaks as she rubs two objects together"], "sample_ids": ["wbHTKEJZyhc", "vzxHnu-SFEw"], "start_seconds": ["20", "80"], "properties": ["stream, water, flow", "two objects, woman, speak"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a physical action", "label": 1}, {"captions": ["a child speaks", "small dogs yip and bark sharply"], "sample_ids": ["yW6FWLSLkx4", "v-wcQf4BDY0"], "start_seconds": ["40", "120"], "properties": ["a, child, speaks", "bark, yip, sharply"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man speaks followed by another man speaking outside", "birds chirp and an owl hoots before a man speaks briefly"], "sample_ids": ["viuTg1M-dqg", "wRBHTgrbiwg"], "start_seconds": ["30", "50"], "properties": ["two men, speak, follow", "bird, owl, speak"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "of a bee pollinating the flowers in the field"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "birds are chirping and insects are buzzing"], "question": "which entity has more animals speaking", "label": 1}, {"captions": ["a person sniffs and sneezes", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["uRlbY6aoBU", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["sneezes, person, sniffs", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is sneezing ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["uWPRNLnpy7Y", "uEU-Hg5MTN8"], "start_seconds": ["10", "27"], "properties": ["accelerate, laugh, vehicle", "animal, grunts, snorts"], "captions_pred_video": ["is taken from a car driving down the street", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vzceMbklWc", "w5W5Kqtc8E"], "start_seconds": ["180", "100"], "properties": ["water, faucet, sink", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and a man is speaking", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["an emergency siren wails as it passes", "some men converse over an engine running"], "sample_ids": ["vGj1XLJvNrw", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["wails, wails, pass", "men, converse, engine"], "captions_pred_video": ["footage of a police car driving down a city street", null], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["y8WEcpOlT3I", "yajyRTUQk3U"], "start_seconds": ["40", "400"], "properties": ["wind, speak, buffeting", "a woman, something, fried"], "captions_pred_video": ["on how to use a sewing machine youtube", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a kid speaks followed by music playing", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["tQWGZLItBXk", "zl9Dqx-j7q4"], "start_seconds": ["170", "6"], "properties": ["music, kid, speak", "engine, laugh, loud"], "captions_pred_video": ["worms revolution screenshots", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "a woman speaks as she rubs two objects together"], "sample_ids": ["ylpYOorfH4o", "vzxHnu-SFEw"], "start_seconds": ["410", "80"], "properties": ["motor, run, steady", "two objects, woman, speak"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a woman is speaking and breathing with mechanisms in the background "], "question": "which object is being rubbed together?", "label": 1}, {"captions": ["a person speaks over rustling leaves", "water splashes as an animal walks through"], "sample_ids": ["zOZleIRqZm4", "w1ir-sZ3Im8"], "start_seconds": ["80", "90"], "properties": ["rustling, leaves, person", "animal, water, splashes"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a stream of water flows as people talk and wind blows"], "sample_ids": ["zY3icUyMdh8", "xBxDz0CFVn0"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "stream, water, flow"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage is blurry and out of focus"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "an infant crying as a woman laughs"], "sample_ids": ["x6ijhqRY38s", "xhmRY9yhC7c"], "start_seconds": ["250", "20"], "properties": ["something metal, glass, hit", "a, laugh, infant"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a woman sneezes then speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["x4dZyf9Gbj0", "zl9Dqx-j7q4"], "start_seconds": ["130", "6"], "properties": ["sneezes, speaks, woman", "engine, laugh, loud"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman sneezes and speaks", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "an infant crying as a woman laughs"], "sample_ids": ["vVhthZ45k3Y", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["cat, purr, hiss", "a, laugh, infant"], "captions_pred_video": ["footage is blurry and out of focus", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["an audience gives applause", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["x6iCUDmRpKQ", "tiDFTC-5vU"], "start_seconds": ["38", "30"], "properties": ["applause, audience, give", "male, duck, laugh"], "captions_pred_video": ["a black background with the moon and stars in the sky", null], "captions_pred_audio": ["a group of people are clapping and cheering", "a man is speaking and ducks are quacking"], "question": "which is not a person", "label": 1}, {"captions": ["an engine revs and a turning noise is made", "a train horn blows as it passes by"], "sample_ids": ["tOSWIURC-4", "zVacuqSb4LI"], "start_seconds": ["0", "30"], "properties": ["noise, engine, revs", "horn, blows, train"], "captions_pred_video": [null, "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a lawn mower is running ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which train is making noise", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "a kid speaks followed by music playing"], "sample_ids": ["rwtmaKiCcQU", "tQWGZLItBXk"], "start_seconds": ["30", "170"], "properties": ["nozzle, depressed, spray can", "music, kid, speak"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "worms revolution screenshots"], "captions_pred_audio": ["spraying and people speaking", "a child speaks music plays video game sounds sound effects and sound effects play "], "question": "which entity has a nozzle depressed", "label": 0}, {"captions": ["a car accelerates and wind blows", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["u0TrcHhkPQ", "tdWhHV3X25Q"], "start_seconds": ["20", "60"], "properties": ["accelerates, wind, blows", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["xl2PIWyXaM", "wDVMhEdTiVw"], "start_seconds": ["160", "30"], "properties": ["chirp, man, younger person", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["birds are chirping and people are talking", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["wTjoRj1se3U", "xfaoyyzw2WU"], "start_seconds": ["390", "180"], "properties": ["engine, run, people", "loud, jet engine, roar"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a jet engine is running and people are talking", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "water flows as men speak and yell"], "sample_ids": ["uZesmtKZGSw", "vJ7JPEFhyLA"], "start_seconds": ["250", "16"], "properties": ["men, talk, cars", "water, flow, men"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["bees buzz and wind blows", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["tMJne1a4AFI", "xfaoyyzw2WU"], "start_seconds": ["0", "180"], "properties": ["bees buzz, wind blows, bees", "loud, jet engine, roar"], "captions_pred_video": ["a swarm of bees on the ground", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a swarm of bees buzzing around", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "three men talk while wind blows and some liquid flows"], "sample_ids": ["tDVADusiIoc", "vJ7JPEFhyLA"], "start_seconds": ["60", "16"], "properties": ["water, radio, man", "three men, wind, flow"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more people", "label": 1}, {"captions": ["wind blowing and birds chirping with the distant cooing of a large bird", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["wRBHTgrbiwg", "vlS6YMeWAPo"], "start_seconds": ["50", "40"], "properties": ["birds, chirp, cooing", "sheep, baa, birds"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a goat bleats and birds chirp"], "question": "which entity is a sheep?", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "a man speaks as a motor runs in the background"], "sample_ids": ["vW4x7S1VfQc", "xZepNM9qcRA"], "start_seconds": ["150", "30"], "properties": ["clacking, oil, woman", "background, motor, run"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["food sizzles in a frying pan", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a train horn sounds as a railroad passing bell rings", "an adult speaks and is typing on a computer keyboard"], "sample_ids": ["zgUgkpk78xU", "x9JovgqUcs"], "start_seconds": ["70", "500"], "properties": ["horn, bell, train", "An adult is speaking, typing, and using a computer keyboard"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", null], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a man speaks and types on a keyboard"], "question": "which entity is a person", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vms5XGTDVQc", "w5W5Kqtc8E"], "start_seconds": ["220", "100"], "properties": ["paper, crumpled, crinkled", "wind, blow, vehicle"], "captions_pred_video": ["footage of a woman opening a black bag on a table", null], "captions_pred_audio": ["paper is crumpled and crinkled", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["wind blows and people scream while an engine revs", "sirens ring and approach with humming of distant traffic"], "sample_ids": ["w5W5Kqtc8E", "xERFUeZONz8"], "start_seconds": ["100", "0"], "properties": ["wind, engine, scream", "ring, approach, traffic"], "captions_pred_video": [null, "footage is blurry due to camera shake or motion blur"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "an emergency vehicle siren blares"], "question": "which entity is a warning", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "heavy rain splashes as it falls"], "sample_ids": ["u21-Z5gJCB8", "wP8ZKrlx3oA"], "start_seconds": ["30", "40"], "properties": ["background, voice, man", "fall, rain, splash"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "footage of a flooded street in the middle of a desert with mountains in the background"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a heavy rain is falling on a surface"], "question": "which entity is more likely to cause water to splash", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["w2M4i1mklOA", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["alarm, gears, turn", "rooster, crow, background, men"], "captions_pred_video": ["footage of an antique clock", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a clock?", "label": 0}, {"captions": ["a child babbles as a woman speaks", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["wEBlkGWVWwE", "zFjIWfSD-4"], "start_seconds": ["260", "410"], "properties": ["a, babble, woman", "People, motor, brakes"], "captions_pred_video": ["shows a person writing on the whiteboard", null], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a person", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "a duck quacks continuously"], "sample_ids": ["rwtmaKiCcQU", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["nozzle, depressed, spray can", "quacks, continuously, duck"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["spraying and people speaking", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a woman and man speak while food is frying", "a car accelerates and wind blows"], "sample_ids": ["zk-xJGQU8-4", "u0TrcHhkPQ"], "start_seconds": ["130", "20"], "properties": ["food, man, woman", "accelerates, wind, blows"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", null], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["tQWGZLItBXk", "sLUnaPT5gM8"], "start_seconds": ["170", "0"], "properties": ["voice, music, whoosh", "loud, laughter, intermittent"], "captions_pred_video": ["worms revolution screenshots", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["water rushes and then a vehicle zooms past", "a woman speaks as she rubs two objects together"], "sample_ids": ["s4Uz1Ffgo04", "vzxHnu-SFEw"], "start_seconds": ["100", "80"], "properties": ["water, rushes, vehicle", "two objects, woman, speak"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a vehicle zooming past?", "label": 0}, {"captions": ["vehicle engines race around a track as a man commentates", "waves crash against a shoreline and people speak"], "sample_ids": ["sZPuqDgX2V0", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["commentator, race, track", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be seen in a movie", "label": 1}, {"captions": ["a flush is followed by gurgling water, then another flush", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["tqR406bGiE", "w5W5Kqtc8E"], "start_seconds": ["40", "100"], "properties": ["flush, water, gurgle", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a toilet is flushed", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about water?", "label": 0}, {"captions": ["male speech with light ticking", "water flows as men speak and yell"], "sample_ids": ["xO-Q2BlIIPU", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["male, speech, ticking", "water, flow, men"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "a duck quacks continuously"], "sample_ids": ["ukg5L09Wpvo", "vh30P49Po6s"], "start_seconds": ["150", "30"], "properties": ["a train, a horn, a bell", "quacks, continuously, duck"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "a machine beeps continuously"], "sample_ids": ["sWZzXuWYY", "y682ml90jGw"], "start_seconds": ["420", "11"], "properties": ["male, speech, banging", "beeps, machine, continuously"], "captions_pred_video": [null, null], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vfYTJq7nU", "w5W5Kqtc8E"], "start_seconds": ["130", "100"], "properties": ["rustling, ducks, quack", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a duck quacks and a woman speaks", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sfAvvZwdLCY", "xBxDz0CFVn0"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "stream, water, flow"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage is blurry and out of focus"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking with wind noise in the background "], "question": "which entity is moving water", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vlJS7LN2XyM", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["background, clocks, ticking", "loud, multiple, distance"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", null], "captions_pred_audio": ["a ticktock of a clock", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a person is whistling", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sIXTftIuUgw", "xBxDz0CFVn0"], "start_seconds": ["90", "30"], "properties": ["person, whistling, person", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a person whistling a song", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman sneezes then speaks", "a small musical boom and then birds tweet and a few dogs pant"], "sample_ids": ["x4dZyf9Gbj0", "y2ZBGpgbhHM"], "start_seconds": ["130", "30"], "properties": ["sneezes, speaks, woman", "birds, tweet, pant"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a woman sneezes and speaks", "birds chirping and a dog panting"], "question": "which entity is more active", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "a dark barks and whimpers"], "sample_ids": ["xKB8O8LTs6s", "sYj4hpDUZDQ"], "start_seconds": ["70", "30"], "properties": ["music, gunfire, explosion", "barks, whimpers, dark"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a brown and white dog standing in front of a wall with its mouth open"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a dog barks and a cat meows"], "question": "which entity is more quiet", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "multiple people speak and children yell while water gurgles"], "sample_ids": ["sZPuqDgX2V0", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["commentator, race, track", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "a car speeding up in the distance"], "sample_ids": ["spJCm8tD9Zo", "u0TrcHhkPQ"], "start_seconds": ["90", "20"], "properties": ["snores, wheezes, sleeps", "distance, car, speed"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sofxkNWaP0s", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["wind, engine, louder", "engine, idle, woman"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a toilet flushes and water drains", "water splashes as an animal walks through"], "sample_ids": ["sfAvvZwdLCY", "w1ir-sZ3Im8"], "start_seconds": ["20", "90"], "properties": ["water drains, flushes, water", "animal, water, splashes"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a toilet is flushed", "water splashes and gurgles as people speak"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "some tunes played by whistling"], "sample_ids": ["uWPRNLnpy7Y", "u6BnG6YZqJ4"], "start_seconds": ["10", "0"], "properties": ["accelerate, laugh, vehicle", "tune, play, whistling"], "captions_pred_video": ["is taken from a car driving down the street", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "wind blows as people chatter quietly"], "sample_ids": ["xSKJGCItUWE", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["engine, run, boy", "wind, chatter, people"], "captions_pred_video": ["footage of the helicopter flying in the room", "footage is blurry and out of focus"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["birds chirp and wind blows", "a man talks as something metal hits against and glass is set down"], "sample_ids": ["sxIvBMSavMQ", "x6ijhqRY38s"], "start_seconds": ["210", "250"], "properties": ["birds, chirp, wind", "something metal, glass, hit"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "a chef preparing a dish with a bottle of wine and a plate of food on a table"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking and dishes are clanging "], "question": "which entity is about something hitting something?", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "a man speaks over intermittent keyboard taps"], "sample_ids": ["wSVhSdj0F0", "tw76HGONaKg"], "start_seconds": ["10", "570"], "properties": ["horn honks, keys jingle, slam", "audio, man, keyboard"], "captions_pred_video": [null, "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a man speaks and types on a computer keyboard "], "question": "which entity is a recording of a person speaking?", "label": 1}, {"captions": ["a diesel truck engine runs continuously", "a train horn blows as it passes by"], "sample_ids": ["sZvwOuuPGP0", "zVacuqSb4LI"], "start_seconds": ["50", "30"], "properties": ["engine, diesel, truck", "horn, blows, train"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a medium engine is running ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is not a vehicle", "label": 1}, {"captions": ["water running down a sink while a man is talking", "wind blows strongly and a young man speaks"], "sample_ids": ["vSeGhaZt-aI", "vs65y4qmyBE"], "start_seconds": ["50", "340"], "properties": ["water, sink, talk", "wind, blows, strongly"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a car is engulfed in flames on the side of the road"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a heavy engine is running and men are speaking "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "people applaud and hoot and chat quietly"], "sample_ids": ["ylpYOorfH4o", "wwyfGO2J4"], "start_seconds": ["410", "90"], "properties": ["engine, run, loud", "people, applaud, hoot"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", null], "captions_pred_audio": ["a man is speaking and an engine is revving", "people are clapping and speaking with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["sWZzXuWYY", "w5W5Kqtc8E"], "start_seconds": ["420", "100"], "properties": ["male, clanks, thumps", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a vehicle engine running?", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "water is sprayed across a hard surface"], "sample_ids": ["uEU-Hg5MTN8", "sQwlkXjQabo"], "start_seconds": ["27", "10"], "properties": ["animal, grunts, snorts", "water, spray, surface"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks while water drains", "propeller rearing loudly with some male and female voices interspersed in the background"], "sample_ids": ["vSeGhaZt-aI", "vqZuVbG6-HI"], "start_seconds": ["50", "130"], "properties": ["water, drain, man", "background, male, female"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a lawn mower is running and men are speaking "], "question": "which entity has more background noise", "label": 1}, {"captions": ["someone snores nearby", "a car speeding up in the distance"], "sample_ids": ["spJCm8tD9Zo", "u0TrcHhkPQ"], "start_seconds": ["90", "20"], "properties": ["someone snores, nearby, someone", "distance, car, speed"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "a beep occurs briefly"], "sample_ids": ["sNB8zxXneIM", "xtWeJ56-U-g"], "start_seconds": ["20", "20"], "properties": ["several, quack, cocks", "beep, occur, briefly"], "captions_pred_video": ["a group of geese in a cage", "how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "mechanisms are ticking and a beep is heard "], "question": "which entity is silent", "label": 1}, {"captions": ["an electric engine works nearby followed by a child talking", "some tunes played by whistling"], "sample_ids": ["xSKJGCItUWE", "u6BnG6YZqJ4"], "start_seconds": ["10", "0"], "properties": ["engine, work, child", "tune, play, whistling"], "captions_pred_video": ["footage of the helicopter flying in the room", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["birds coo incessantly", "people applaud and hoot and chat quietly"], "sample_ids": ["yZrFNS7GFBQ", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["coo, bird, incessant", "people, applaud, hoot"], "captions_pred_video": ["of the bird in the cage", null], "captions_pred_audio": ["an owl hoots in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["someone is burping continuously", "winds blows roughly as a vehicle races past"], "sample_ids": ["y636gklDioE", "xjvTpk2Zpr8"], "start_seconds": ["20", "70"], "properties": ["burps, burps, burps", "wind, blows, vehicle"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a person burps loudly several times", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["someone snores nearby", "a woman speaks with water running"], "sample_ids": ["spJCm8tD9Zo", "wTideSjRFS0"], "start_seconds": ["90", "30"], "properties": ["someone snores, nearby, someone", "water, running, woman"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking while water is running in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["s59PfAghdkM", "tDVADusiIoc"], "start_seconds": ["0", "60"], "properties": ["bird, chirp, background, horse, neigh", "water, radio, man"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a horse in it?", "label": 0}, {"captions": ["wind blows strongly and a young man speaks", "a man speaks as a motor runs in the background"], "sample_ids": ["vs65y4qmyBE", "xZepNM9qcRA"], "start_seconds": ["340", "30"], "properties": ["wind, blows, strongly", "background, motor, run"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["food is frying and sizzles", "vehicles pass by on a roadway"], "sample_ids": ["zNRChLjqcU", "tgbONvsP47Y"], "start_seconds": ["220", "0"], "properties": ["food is frying, sizzles, food", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["water is running from a faucet into a sink", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaking with light rustling", "a vehicle engine runs and someone speaks"], "sample_ids": ["zOZleIRqZm4", "zF8yoL0rkbI"], "start_seconds": ["80", "30"], "properties": ["light, rustling, man", "engine, run, someone"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of the traffic on the street at night"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "the wind is blowing hard and water is splashing"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a clang followed by a toilet flushing", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wNZ5thZM7XU", "tdWhHV3X25Q"], "start_seconds": ["20", "60"], "properties": ["sound, flush, toilet", "applause, audience, yells"], "captions_pred_video": ["footage of a toilet in a bathroom stall", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a toilet flushes", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "a woman talks while a baby cries and a man whispers"], "sample_ids": ["vdoxuJn9lTc", "smDKStoHBJo"], "start_seconds": ["40", "0"], "properties": ["burp, loud, girl", "a, talk, baby, cry"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "a man holding a crying baby in his arms"], "captions_pred_audio": ["a child speaks followed by a burp", "a baby is crying and a woman is speaking"], "question": "which entity has a baby?", "label": 1}, {"captions": ["a train horn sounds as the train approaches", "an airplane flies overhead as a woman speaks"], "sample_ids": ["slZLHwNbbt4", "zj2R0XoFr5k"], "start_seconds": ["300", "50"], "properties": ["train, horn, sound", "airplane, fly, overhead"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a woman speaks while a helicopter flies overhead "], "question": "which object is flying overhead", "label": 0}, {"captions": ["the revving of an engine throttle followed by a man speaking", "popping and crackling repeats as men yell and laugh"], "sample_ids": ["tezvROoo4bs", "rqu8iB22IY"], "start_seconds": ["40", "5"], "properties": ["audio, throttle, speaking", "sound, repeats, laugh"], "captions_pred_video": ["footage of a busy city street with cars parked on both sides of the road", null], "captions_pred_audio": ["a car accelerates and revs while a man speaks ", "a dog barks and a man speaks while music plays "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["several ducks are quacking and squawking", "a child speaks in closed space"], "sample_ids": ["wfHeoPDLMaM", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["quacking, squawking, ducks", "child, space, speak"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["ducks are quacking", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "pigeons vocalize and birds chirp"], "sample_ids": ["wTjoRj1se3U", "uiS58TNyUiw"], "start_seconds": ["390", "430"], "properties": ["engine, run, people", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "of the pigeon in the cage"], "captions_pred_audio": ["a jet engine is running and people are talking", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "people speak as gunfire rings out"], "sample_ids": ["rwTERCUno", "wqTCwqVRDlk"], "start_seconds": ["90", "80"], "properties": ["engine, idle, sputter", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["an engine is idling and vibrating", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war", "label": 1}, {"captions": ["a dog barks and whimpers", "someone whistles a tune"], "sample_ids": ["sShpyu2l4YQ", "sIXTftIuUgw"], "start_seconds": ["0", "90"], "properties": ["barks, whimpers, dog", "someone, tune, whistle"], "captions_pred_video": ["the puppies are playing with a toy", null], "captions_pred_audio": ["a dog is barking and growling", "a person whistling a song"], "question": "which entity is a human", "label": 1}, {"captions": ["a man speaks as a machine runs", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vD6lYD1l0BY", "zl9Dqx-j7q4"], "start_seconds": ["330", "6"], "properties": ["a, machine, run", "engine, laugh, loud"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "an infant crying as a woman laughs"], "sample_ids": ["x9JovgqUcs", "xhmRY9yhC7c"], "start_seconds": ["500", "20"], "properties": ["a, man, speaks, keyboard", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vbZ-0lGPneg", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["a woman, a television program, a bird", "multiple, people, yell"], "captions_pred_video": ["of a man holding a baby duck in his hands", null], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "people applaud and hoot and chat quietly"], "sample_ids": ["vZAw4apG0Es", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["people, clock, converse", "people, applaud, hoot"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["an infant crying as a woman laughs", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["xhmRY9yhC7c", "zFjIWfSD-4"], "start_seconds": ["20", "410"], "properties": ["a, laugh, infant", "People, motor, brakes"], "captions_pred_video": ["of a baby crying in a baby bouncer", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a person", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "dishes cling together then a man begins to speak"], "sample_ids": ["zj2R0XoFr5k", "sQGXqGcwOTc"], "start_seconds": ["50", "3"], "properties": ["airplane, boy, fly", "cling, speak, dishes"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "mechanisms are operating and water is splashing "], "question": "which entity is about a boy speaking?", "label": 0}, {"captions": ["a man sprays as a scraping occurs in the background", "water flows and trickles"], "sample_ids": ["sOa7g-44Dag", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["background, man, spray", "water, flow, trickle"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an aircraft engine runs", "a man speaks as a motor runs in the background"], "sample_ids": ["yLCORCnd35Q", "xZepNM9qcRA"], "start_seconds": ["0", "30"], "properties": ["engine, aircraft, runs", "background, motor, run"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a motor?", "label": 1}, {"captions": ["a man speaks while water drains", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vSeGhaZt-aI", "xKB8O8LTs6s"], "start_seconds": ["50", "70"], "properties": ["water, drain, man", "music, gunfire, explosion"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more calm", "label": 0}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a stream of water flows as people talk and wind blows"], "sample_ids": ["wvKpEYswXO0", "xBxDz0CFVn0"], "start_seconds": ["150", "30"], "properties": ["sound, water, running", "stream, water, flow"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking with wind noise in the background "], "question": "which entity has water flowing", "label": 1}, {"captions": ["a clock ticktocks briefly", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["u7C-AEBQM", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["ticktocks, clock, ticktocks briefly", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a ticktock of a clock", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["u5RmF3c3Aw", "yajyRTUQk3U"], "start_seconds": ["60", "400"], "properties": ["engine, car, zoom", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a woman is speaking while food is frying in the background"], "question": "which entity is about a car zooming by?", "label": 0}, {"captions": ["heavy rain splashes as it falls", "a car accelerates and wind blows"], "sample_ids": ["wP8ZKrlx3oA", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["fall, rain, splash", "accelerates, wind, blows"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", null], "captions_pred_audio": ["a heavy rain is falling on a surface", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "someone is typing on a computer keyboard"], "sample_ids": ["vW4x7S1VfQc", "v0x1odnXtP0"], "start_seconds": ["150", "210"], "properties": ["clacking, oil, woman", "keyboard, type, computer"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "how to make money on youtube in spanish"], "captions_pred_audio": ["food sizzles in a frying pan", "a person is typing on a keyboard"], "question": "which is a type of computer", "label": 1}, {"captions": ["people speak in a closed space", "a child babbles as a woman speaks"], "sample_ids": ["sTpirNYo8vQ", "wEBlkGWVWwE"], "start_seconds": ["30", "260"], "properties": ["people, space, speak", "a, babble, woman"], "captions_pred_video": ["of a man taking a selfie on a bus", "shows a person writing on the whiteboard"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a woman is speaking and a child is speaking with background noise and clapping "], "question": "which entity is a child babbles as a woman speaks?", "label": 1}, {"captions": ["women speak and laugh as wind blows", "vehicles pass by on a roadway"], "sample_ids": ["un9VQlzgZM", "tgbONvsP47Y"], "start_seconds": ["5", "0"], "properties": ["wind, speak, laugh", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "water rushes and then a vehicle zooms past"], "sample_ids": ["vJvryTwuAV8", "s4Uz1Ffgo04"], "start_seconds": ["16", "100"], "properties": ["audience, cheer, man", "water, rushes, vehicle"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is more active", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "some tunes played by whistling"], "sample_ids": ["zofjfKhqLk8", "u6BnG6YZqJ4"], "start_seconds": ["10", "0"], "properties": ["background, metal, clings", "tune, play, whistling"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a person whistling a song"], "question": "which entity is playing a tune", "label": 1}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["ukg5L09Wpvo", "xKB8O8LTs6s"], "start_seconds": ["150", "70"], "properties": ["clickety-clack, train, whistle", "music, gunfire, explosion"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "a car speeding up in the distance"], "sample_ids": ["sHbXC6na9hg", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["a person, saw, wood", "distance, car, speed"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", null], "captions_pred_audio": ["an engine is idling and vibrating", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a baby cries and a woman speaks", "a man speaks in the background while a slow tick repeats"], "sample_ids": ["tMbMDvT50j8", "vZAw4apG0Es"], "start_seconds": ["12", "30"], "properties": ["a, cry, woman", "background, tick, repeat"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["a baby cries and a woman speaks", "a clock is ticking and people are talking"], "question": "which entity has a tick repeating in the background?", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["wTideSjRFS0", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["food, sizzle, woman", "a, scream, girl"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a woman is speaking and a baby is crying"], "question": "which entity is about a girl speaking?", "label": 1}, {"captions": ["a motorcycle engine is idling", "an airplane flies overhead as a woman speaks"], "sample_ids": ["vZAqdHZ81yA", "zj2R0XoFr5k"], "start_seconds": ["180", "50"], "properties": ["engine, motorcycle, idling", "airplane, fly, overhead"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["an engine is idling loudly", "a woman speaks while a helicopter flies overhead "], "question": "which object is moving", "label": 1}, {"captions": ["male speech with light ticking", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["xO-Q2BlIIPU", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["male, speech, ticking", "beeps, hit, woman"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["an engine revs and a turning noise is made", "wind blows as people chatter quietly"], "sample_ids": ["tOSWIURC-4", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["noise, engine, revs", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a lawn mower is running ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a woman speaks and dog vocalizes", "frogs croak and vocalize"], "sample_ids": ["uWAAAL4CIoc", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["a, dog, vocalize", "croak, vocalize, frog"], "captions_pred_video": [null, "a close up of a frog in the water"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a frog is croaking"], "question": "which animal is vocalizing", "label": 1}, {"captions": ["someone whistles a song", "dishes cling together then a man begins to speak"], "sample_ids": ["sIXTftIuUgw", "sQGXqGcwOTc"], "start_seconds": ["90", "3"], "properties": ["someone, song, whistle", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a person whistling a song", "mechanisms are operating and water is splashing "], "question": "which entity is a person", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["w2M4i1mklOA", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["loud, chime, bell", "beeps, hit, woman"], "captions_pred_video": ["footage of an antique clock", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a beep sounds followed by a child speaking"], "question": "which entity is quieter", "label": 1}, {"captions": ["a dog barks and whimpers", "engines sputter roughly and tires squeal"], "sample_ids": ["sShpyu2l4YQ", "zhx6hoYrHeI"], "start_seconds": ["0", "160"], "properties": ["barks, whimpers, dog", "engine, sputter, rough"], "captions_pred_video": ["the puppies are playing with a toy", "footage of a man working on a motorcycle's tire"], "captions_pred_audio": ["a dog is barking and growling", "a car accelerates and revs its engine "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a woman talking as an infant is crying", "a baby cries and fusses, a woman speaks, and a man speaks"], "sample_ids": ["tMbMDvT50j8", "wyllXV6PjKo"], "start_seconds": ["12", "30"], "properties": ["a, talk, infant", "a baby, a woman, a man"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a woman speaks and a baby cries"], "question": "which entity has a baby?", "label": 1}, {"captions": ["male speech with light ticking", "people applaud and hoot and chat quietly"], "sample_ids": ["xO-Q2BlIIPU", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["male, speech, ticking", "people, applaud, hoot"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a child speaks in closed space"], "sample_ids": ["yLy-WycbVVE", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["background, people, talk", "child, space, speak"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is more quiet", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["s7knHCFW82w", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["blow horn, get close, train", "gun, shoot, water"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a gun", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "water is sprayed across a hard surface"], "sample_ids": ["vcmWSmvti8", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["music, man, fire", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "people speak as gunfire rings out"], "sample_ids": ["zkKdxzNC97Y", "wqTCwqVRDlk"], "start_seconds": ["27", "80"], "properties": ["hard, surface, door", "gunfire, ring, speak"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "a duck quacks loudly and continuously"], "sample_ids": ["w2M4i1mklOA", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["loud, chime, bell", "loud, continuous, quacks"], "captions_pred_video": ["footage of an antique clock", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "a car speeding up in the distance"], "sample_ids": ["wRV8yMk886E", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["liquid, spray, nozzle", "distance, car, speed"], "captions_pred_video": ["two cars are parked in a parking lot at night", null], "captions_pred_audio": ["a man speaks followed by a loud burst", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a flush is followed by gurgling water, then another flush", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["tqR406bGiE", "vfYTJq7nU"], "start_seconds": ["40", "130"], "properties": ["flush, water, gurgle", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a toilet is flushed", "a duck quacks and a woman speaks"], "question": "which entity is about water?", "label": 0}, {"captions": ["children cheer as a man speaks then an audience screams", "a duck quacks continuously"], "sample_ids": ["vJvryTwuAV8", "vh30P49Po6s"], "start_seconds": ["16", "30"], "properties": ["audience, cheer, man", "quacks, continuously, duck"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vfYTJq7nU", "xBxDz0CFVn0"], "start_seconds": ["130", "30"], "properties": ["ducks, quack, man", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a man is speaking with wind noise in the background "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vqZuVbG6-HI", "vb1fPSDI4c"], "start_seconds": ["130", "30"], "properties": ["background, male, female", "multiple, people, yell"], "captions_pred_video": ["footage is blurry because it's raining outside", null], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sofxkNWaP0s", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["wind, engine, louder", "two men, woman, birds"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", null], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a man talks while vehicles pass by", "an infant crying frantically"], "sample_ids": ["sK4u5T8hW78", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["a, man, talk", "cry, infant, frantically"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a child yells and another yells", "a stream of water runs briefly"], "sample_ids": ["vMDHu7Lxcgw", "x-PeY8Yb8M4"], "start_seconds": ["410", "300"], "properties": ["two, yell, child", "stream, water, run"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sofxkNWaP0s", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["wind, engine, louder", "airplane, boy, fly"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a plane flying?", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "a motorcycle engine is idling"], "sample_ids": ["zuua6-5goWw", "vZAqdHZ81yA"], "start_seconds": ["30", "180"], "properties": ["birds, chirp, quiet, man, speaks", "engine, motorcycle, idling"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "a motorcycle is parked on the side of the road with its rear end facing the viewer"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "an engine is idling loudly"], "question": "which is quieter", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "a propeller rotates loudly and intensely"], "sample_ids": ["sSMl2vc3ek", "ugHJF0hfYkg"], "start_seconds": ["20", "10"], "properties": ["a person, laughs, snores", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a person snoring loudly", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "a man speaks uses a drill"], "sample_ids": ["siJFXfGWgDk", "x5eIC7S0fbg"], "start_seconds": ["50", "60"], "properties": ["man, woman, vehicle", "A man is speaking, uses a drill, and is a tool"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "a person in surgical gloves is using a needle to remove a small object from a tooth"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking and using a power tool "], "question": "which entity is a tool", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "a duck quacks continuously"], "sample_ids": ["yeFvk9x0wWI", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["clack, bird, chirp", "quacks, continuously, duck"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 0}, {"captions": ["a duck quacks several times", "a toilet flushes and water drains unevenly"], "sample_ids": ["vh30P49Po6s", "vhJWZheqaE"], "start_seconds": ["30", "0"], "properties": ["quacks, duck, several", "water drains unevenly, toilet flushes, water drains"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a small engine idles continuously", "after a few seconds of silence, a loud bang occurs followed by a softer banging noise"], "sample_ids": ["y5WII6cTH7k", "zkKdxzNC97Y"], "start_seconds": ["40", "27"], "properties": ["engine, idle, continuously", "loud, bang, noise"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "footage of the door opening and closing in slow motion"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a door is opened and closed"], "question": "which entity is louder", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sncRqQ67iJU", "wz7N8YRy74I"], "start_seconds": ["460", "30"], "properties": ["loud, repeatedly, man", "rooster, crow, background, men"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a person is snoring", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man?", "label": 0}, {"captions": ["speaking following by laughing and clapping", "a door slams shut roughly"], "sample_ids": ["u2f5NpsoHBg", "zkKdxzNC97Y"], "start_seconds": ["30", "27"], "properties": ["person, laugh, clap", "a door, slams, shut"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "footage of the door opening and closing in slow motion"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a door is opened and closed"], "question": "which entity is more likely to be a door", "label": 1}, {"captions": ["people speak then an engine runs", "a horn rings out as a machine runs by"], "sample_ids": ["uMTTDZ2mb4", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["engine, run, people", "a, horn, run"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is about a machine running by?", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "a baby laughs giddily and a woman laughs then speaks"], "sample_ids": ["sU53zg9Jp7s", "wjsXBsc7M40"], "start_seconds": ["380", "10"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "a baby laughs, a woman laughs, a woman speaks"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "footage of the baby playing with a toothbrush"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a baby laughs and a woman speaks"], "question": "which entity is more playful", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["vveS8HT7Uog", "wwyfGO2J4"], "start_seconds": ["100", "90"], "properties": ["a man, objects, speak", "people, applaud, hoot"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", null], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "people are clapping and speaking with background noise "], "question": "which entity is a group of people?", "label": 1}, {"captions": ["male speech with light ticking", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["xO-Q2BlIIPU", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["male, speech, ticking", "engine, laugh, loud"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "a duck quacks loudly and continuously"], "sample_ids": ["yZrFNS7GFBQ", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["pigeon, buzzes, insect", "loud, continuous, quacks"], "captions_pred_video": ["of the bird in the cage", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["an owl hoots in the background ", "a duck is quacking loudly"], "question": "which animal is making a noise", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "water flows as men speak and yell"], "sample_ids": ["spYNpeN7rPY", "vJ7JPEFhyLA"], "start_seconds": ["1", "16"], "properties": ["a clock, ticktock, man", "water, flow, men"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["zcDwZ6W7E3E", "uYT5gxnyMWM"], "start_seconds": ["180", "50"], "properties": ["a, man, speak", "a, scream, girl"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a small engine idles continuously"], "sample_ids": ["s4Uz1Ffgo04", "y5WII6cTH7k"], "start_seconds": ["100", "40"], "properties": ["roars, background, people speaking", "engine, idle, continuously"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of a sewing machine stitching a red and white hat"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "an engine is knocking and vibrating "], "question": "which entity is quieter", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["rqu8iB22IY", "tDVADusiIoc"], "start_seconds": ["5", "60"], "properties": ["sound, repeats, laugh", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "someone sprays a liquid onto a hard surface making a hiss sound"], "sample_ids": ["wztCSUxOf8", "zO-LSSY92ZM"], "start_seconds": ["130", "30"], "properties": ["a crowd, yells, applauds", "liquid, surface, sound"], "captions_pred_video": [null, "youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "steam is hissing and hissing"], "question": "which entity is not a crowd?", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "small dogs yip and bark sharply"], "sample_ids": ["tPJvjq9QePY", "v-wcQf4BDY0"], "start_seconds": ["40", "120"], "properties": ["animal, bleat, moo", "bark, yip, sharply"], "captions_pred_video": ["a dog and a sheep in a barn", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a baby cries and a man speaks", "a dog barks and growls"], "question": "which entity is more vocal", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a weapon fires multiple times"], "sample_ids": ["xSKJGCItUWE", "sMC07Ucy7kg"], "start_seconds": ["10", "10"], "properties": ["engine, run, boy", "weapon, fire, multiple"], "captions_pred_video": ["footage of the helicopter flying in the room", "footage is from a car's point of view"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "waves crash against a shoreline and people speak"], "sample_ids": ["zkKdxzNC97Y", "yFB25fqfU8I"], "start_seconds": ["27", "300"], "properties": ["hard, surface, door", "wave, crash, shoreline"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be a natural phenomenon", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["un9VQlzgZM", "zl9Dqx-j7q4"], "start_seconds": ["5", "6"], "properties": ["females, talk, laugh", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a man talks as several small engines run", "some men converse over an engine running"], "sample_ids": ["u9A6VZQCZpU", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["a, man, talk", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows a man talking?", "label": 0}, {"captions": ["an airplane engine runs", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["yVPZ2MNWpms", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["engine, airplane, runs", "a woman, laughs, animal"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a car is driving by on the road ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "someone is typing on a computer keyboard"], "sample_ids": ["uEU-Hg5MTN8", "v0x1odnXtP0"], "start_seconds": ["27", "210"], "properties": ["a woman, laughs, animal", "keyboard, type, computer"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "how to make money on youtube in spanish"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["sapQIQUhFc", "xfaoyyzw2WU"], "start_seconds": ["280", "180"], "properties": ["liquid, flow, distance", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "plastic is tapped on while someone speaks"], "sample_ids": ["vW4x7S1VfQc", "wvKpEYswXO0"], "start_seconds": ["150", "150"], "properties": ["clacking, oil, woman", "plastic, tap, speak"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "of the person preparing food in the kitchen"], "captions_pred_audio": ["food sizzles in a frying pan", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks?", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["t8CV69hcvF0", "zl9Dqx-j7q4"], "start_seconds": ["210", "6"], "properties": ["person, sneeze, follow", "engine, laugh, loud"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman sneezes and speaks", "a jet engine roars "], "question": "which entity is followed by a person speaking", "label": 0}, {"captions": ["a male is speaking and a duck quacks as others laugh", "water splashes as an animal walks through"], "sample_ids": ["tiDFTC-5vU", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["male, duck, laugh", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["uZesmtKZGSw", "su6FAOcOA8c"], "start_seconds": ["250", "4"], "properties": ["men, talk, cars", "engine, idle, woman"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["electronic beeps occur in a short series", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["y682ml90jGw", "ziUT9IFTkjg"], "start_seconds": ["11", "10"], "properties": ["beeps, series, electronic", "background, birds, rustling"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "birds are chirping and a chime is ringing "], "question": "which entity is more natural", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vfYTJq7nU", "zj2R0XoFr5k"], "start_seconds": ["130", "50"], "properties": ["rustling, ducks, quack", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a woman speaks while a helicopter flies overhead "], "question": "which entity is about flying", "label": 1}, {"captions": ["paper is crumpling consistently", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["v5cSxLaHADY", "wz7N8YRy74I"], "start_seconds": ["0", "30"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "rooster, crow, background, men"], "captions_pred_video": ["footage of the person holding a pair of scissors", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["paper is crumpled and crinkled", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a video", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "sirens ring and approach with humming of distant traffic"], "sample_ids": ["xKB8O8LTs6s", "xERFUeZONz8"], "start_seconds": ["70", "0"], "properties": ["music, gunfire, explosion", "ring, approach, traffic"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage is blurry due to camera shake or motion blur"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "an emergency vehicle siren blares"], "question": "which entity is more calm", "label": 1}, {"captions": ["bees buzz as wind blows", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["tMJne1a4AFI", "vbZ-0lGPneg"], "start_seconds": ["0", "30"], "properties": ["bees, buzz, wind", "a woman, a television program, a bird"], "captions_pred_video": ["a swarm of bees on the ground", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a swarm of bees buzzing around", "a woman is speaking and a dog is whimpering"], "question": "which entity is a still image?", "label": 0}, {"captions": ["a man speaks in the background while a slow tick repeats", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["vZAw4apG0Es", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["background, tick, repeat", "beeps, hit, woman"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a clock is ticking and people are talking", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["zj2R0XoFr5k", "xKB8O8LTs6s"], "start_seconds": ["50", "70"], "properties": ["airplane, fly, overhead", "music, gunfire, explosion"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "a clock ticktocks"], "sample_ids": ["su6FAOcOA8c", "v-g-j2uTByM"], "start_seconds": ["4", "30"], "properties": ["engine, idle, woman", "ticktocks, clock, ticktocks"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a person screams glaringly", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["xC8kbrKJmco", "zl9Dqx-j7q4"], "start_seconds": ["0", "6"], "properties": ["glaringly, screams, person", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a goat is bleating ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["water is sprayed across a hard surface", "paper folding and crinkling"], "sample_ids": ["sQwlkXjQabo", "zPpG3RD8lSs"], "start_seconds": ["10", "20"], "properties": ["water, spray, surface", "paper, fold, crinkle"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["spraying followed by silence", "the wind blows and a mouse clicks "], "question": "which entity is not a liquid", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "water runs from a faucet while some men speak and the water runs in the sink"], "sample_ids": ["s6DESzUTGjY", "vzceMbklWc"], "start_seconds": ["16", "180"], "properties": ["wind, laugh, woman", "water, faucet, sink"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", null], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "water is running and a man is speaking"], "question": "which entity is a video of a woman laughing?", "label": 0}, {"captions": ["a man speaks while water drains", "small dogs yip and bark sharply"], "sample_ids": ["vSeGhaZt-aI", "v-wcQf4BDY0"], "start_seconds": ["50", "120"], "properties": ["water, drain, man", "bark, yip, sharply"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["voJh2gJxXhA", "yajyRTUQk3U"], "start_seconds": ["50", "400"], "properties": ["music, frog, croak", "a woman, something, fried"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", "- a woman cooking in the kitchen"], "captions_pred_audio": ["music is playing and crickets are chirping ", "a woman is speaking while food is frying in the background"], "question": "which entity is about a woman talking?", "label": 1}, {"captions": ["a toilet door squeaks as it is opened", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["sdXV-ylviw", "zl9Dqx-j7q4"], "start_seconds": ["190", "6"], "properties": ["door, toilet, squeaks", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a dog barks and taps with background noise ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["xSKJGCItUWE", "yajyRTUQk3U"], "start_seconds": ["10", "400"], "properties": ["engine, run, boy", "a woman, something, fried"], "captions_pred_video": ["footage of the helicopter flying in the room", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["an infant crying frantically", "waves crash against a shoreline and people speak"], "sample_ids": ["zwOBqeFTgiU", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["cry, infant, frantically", "wave, crash, shoreline"], "captions_pred_video": ["of the baby crying in the car seat", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a baby cries loudly", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "some tunes played by whistling"], "sample_ids": ["wqZ135Ssz0", "u6BnG6YZqJ4"], "start_seconds": ["60", "0"], "properties": ["two men, woman, birds", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["birds fly and flutter around", "a clock alarm sounds and gears turn"], "sample_ids": ["wGKgwOP3h30", "w2M4i1mklOA"], "start_seconds": ["30", "30"], "properties": ["fly, flutter, around", "alarm, gears, turn"], "captions_pred_video": ["of the pigeons in the coop", "footage of an antique clock"], "captions_pred_audio": ["pigeons coo and flap their wings", "a clock is ticking and a bell is ringing "], "question": "which entity is a clock?", "label": 1}, {"captions": ["birds chirp then an animal grunts", "several insects fly while two men talk"], "sample_ids": ["tDlysoZiA1I", "s-T9OVOiMLo"], "start_seconds": ["0", "330"], "properties": ["animal, grunt, chirp", "several, fly, men"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about animals?", "label": 0}, {"captions": ["a airplane flies overhead as a woman speaks", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["zj2R0XoFr5k", "y8WEcpOlT3I"], "start_seconds": ["50", "40"], "properties": ["airplane, fly, woman", "harsh, wind, blows"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a man is speaking with wind noise in the background "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["multiple ducks quack continuously", "people speak as gunfire rings out"], "sample_ids": ["wfHeoPDLMaM", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["multiple, quack, continuously", "gunfire, ring, speak"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["ducks are quacking", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war", "label": 1}, {"captions": ["birds vocalize and a man speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["v0wPrLBI3hg", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["vocalize, bird, speak", "people, applaud, hoot"], "captions_pred_video": ["footage of the pigeons feeding on the ground", null], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "people speak as gunfire rings out"], "sample_ids": ["vSeGhaZt-aI", "wqTCwqVRDlk"], "start_seconds": ["50", "80"], "properties": ["water, bubbles, speak", "gunfire, ring, speak"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["wRBHTgrbiwg", "tDVADusiIoc"], "start_seconds": ["50", "60"], "properties": ["bird, owl, speak", "water, radio, man"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "water pouring and bubbling"], "sample_ids": ["t97k0cejSQE", "uyRfq-jKPpo"], "start_seconds": ["250", "50"], "properties": ["bird, chirp, insect", "water, bubbles, pouring"], "captions_pred_video": ["a bee on a purple thistle flower", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "water is running from a faucet"], "question": "which entity is more quiet", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "a horn rings out as a machine runs by"], "sample_ids": ["sZPuqDgX2V0", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["commentator, race, track", "a, horn, run"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vqZuVbG6-HI", "uZesmtKZGSw"], "start_seconds": ["130", "250"], "properties": ["background, male, female", "men, talk, cars"], "captions_pred_video": ["footage is blurry because it's raining outside", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a toilet door squeaks as it is opened", "water flows and trickles"], "sample_ids": ["sdXV-ylviw", "tB7hWb9gTuQ"], "start_seconds": ["190", "30"], "properties": ["door, toilet, squeaks", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a dog barks and taps with background noise ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks as horns blow", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["tHyNqRyK34A", "tiDFTC-5vU"], "start_seconds": ["24", "30"], "properties": ["a, man, speaks", "male, duck, laugh"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", null], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck in it?", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "multiple adults speaking, and a child shouting in the background"], "sample_ids": ["w8uLijTqtlU", "yks4cLgIDMc"], "start_seconds": ["70", "170"], "properties": ["wind, microphone, noise", "background, speaking, child"], "captions_pred_video": ["footage is blurry and shaky", "footage of two kids wrestling on the floor"], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking and a child is crying"], "question": "which entity is a recording", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "people applaud and hoot and chat quietly"], "sample_ids": ["sTpirNYo8vQ", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["a, tone, fast", "people, applaud, hoot"], "captions_pred_video": ["of a man taking a selfie on a bus", null], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "a car speeding up in the distance"], "sample_ids": ["vqZuVbG6-HI", "u0TrcHhkPQ"], "start_seconds": ["130", "20"], "properties": ["background, male, female", "distance, car, speed"], "captions_pred_video": ["footage is blurry because it's raining outside", null], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "a machine beeps continuously"], "sample_ids": ["vb1fPSDI4c", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["multiple, people, yell", "beeps, machine, continuously"], "captions_pred_video": [null, null], "captions_pred_audio": ["a crowd of people are talking and laughing", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "vehicle engines race around a track as a man commentates"], "sample_ids": ["w2JXXIAdUdg", "sZPuqDgX2V0"], "start_seconds": ["10", "30"], "properties": ["snoring, distance, person", "commentator, race, track"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", null], "captions_pred_audio": ["a person snoring and a dog whimpering", "a man is speaking and a helicopter is flying overhead "], "question": "which entity is a video of a race?", "label": 1}, {"captions": ["a weapon fires multiple times", "an airplane engine runs"], "sample_ids": ["sMC07Ucy7kg", "yVPZ2MNWpms"], "start_seconds": ["10", "0"], "properties": ["weapon, fire, multiple", "engine, airplane, runs"], "captions_pred_video": ["footage is from a car's point of view", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a car is driving by on the road "], "question": "which entity is not a weapon", "label": 1}, {"captions": ["humming of idling and revving engine with a man speaking", "water flows as men speak and yell"], "sample_ids": ["wqADXCzngMw", "vJ7JPEFhyLA"], "start_seconds": ["340", "16"], "properties": ["audio, humming, revving", "water, flow, men"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "water splashes as an animal walks through"], "sample_ids": ["vSeGhaZt-aI", "w1ir-sZ3Im8"], "start_seconds": ["50", "90"], "properties": ["water, bubbles, speak", "animal, water, splashes"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["tQWGZLItBXk", "uZesmtKZGSw"], "start_seconds": ["170", "250"], "properties": ["voice, music, whoosh", "men, talk, cars"], "captions_pred_video": ["worms revolution screenshots", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["scraping and female speech with distant music", "a man speaks as a car is passing by"], "sample_ids": ["yHeVV-xeOxQ", "sK4u5T8hW78"], "start_seconds": ["130", "30"], "properties": ["female, speech, music", "a, car, pass"], "captions_pred_video": ["of a girl milking a goat's udder", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["a power tool runs and touches a surface", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["zfvPRf3chY", "uEU-Hg5MTN8"], "start_seconds": ["290", "27"], "properties": ["power tool, run, touch", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking while a power tool is being used ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["water running down a sink while a man is talking", "paper folding and crinkling"], "sample_ids": ["vSeGhaZt-aI", "zPpG3RD8lSs"], "start_seconds": ["50", "20"], "properties": ["water, sink, talk", "paper, fold, crinkle"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "the wind blows and a mouse clicks "], "question": "which entity is not a person", "label": 1}, {"captions": ["a cat meows and children speak", "a stream of water runs briefly"], "sample_ids": ["x5cuQjOdM3E", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["cat, speak, children", "stream, water, run"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a cat meows and a woman speaks", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["sShpyu2l4YQ", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["growl, bark, yip", "a, scream, girl"], "captions_pred_video": ["the puppies are playing with a toy", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a dog is barking and growling", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "a machine beeps continuously"], "sample_ids": ["w34HjHr6gAY", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["beeps, squawk, child speaking", "beeps, machine, continuously"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", null], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a beeping sound is being made "], "question": "which entity is a machine?", "label": 1}, {"captions": ["several insects fly while two men talk", "someone is typing on a computer keyboard"], "sample_ids": ["s-T9OVOiMLo", "v0x1odnXtP0"], "start_seconds": ["330", "210"], "properties": ["several, fly, men", "keyboard, type, computer"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a toilet flushes and water drains"], "sample_ids": ["v0x1odnXtP0", "sfAvvZwdLCY"], "start_seconds": ["210", "20"], "properties": ["keyboard, type, computer", "water drains, flushes, water"], "captions_pred_video": ["how to make money on youtube in spanish", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a person is typing on a keyboard", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sSMl2vc3ek", "uYT5gxnyMWM"], "start_seconds": ["20", "50"], "properties": ["a person, laughs, snores", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person?", "label": 0}, {"captions": ["animals bleat and cry out and then a woman speaks", "some tunes played by whistling"], "sample_ids": ["yZp6xizR0yU", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["animal, bleat, cry", "tune, play, whistling"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "winds blows roughly as a vehicle races past"], "sample_ids": ["xzKKf9bKNUo", "xjvTpk2Zpr8"], "start_seconds": ["10", "70"], "properties": ["background, noise, snoring", "wind, blows, vehicle"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a person snoring loudly", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["t97k0cejSQE", "xKB8O8LTs6s"], "start_seconds": ["250", "70"], "properties": ["bird, chirp, insect", "music, gunfire, explosion"], "captions_pred_video": ["a bee on a purple thistle flower", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["an insect buzzes around continuously", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["v25l1jef3JY", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["buzzes, continuously, insect", "animal, grunts, snorts"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a woman is speaking and a baby is crying"], "question": "which entity is not a person?", "label": 0}, {"captions": ["multiple insects buzz over rustling wind", "a clock ticktocks and sounds an alarm then a man laughs"], "sample_ids": ["tMJne1a4AFI", "xV7Mg1QucSc"], "start_seconds": ["0", "14"], "properties": ["wind, buzz, rustling", "alarm, ticktocks, laughs"], "captions_pred_video": ["a swarm of bees on the ground", "a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a swarm of bees buzzing around", "an alarm clock ticks and a woman laughs"], "question": "which entity is not a clock?", "label": 0}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["uRExseg-0XI", "ziUT9IFTkjg"], "start_seconds": ["210", "10"], "properties": ["woman, man, water", "background, birds, rustling"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", null], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "birds are chirping and a chime is ringing "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["continuous chugging with birds chirping in the background", "a person uses a saw to cut some wood"], "sample_ids": ["xM4joTqDVp4", "sHbXC6na9hg"], "start_seconds": ["160", "0"], "properties": ["background, chirp, birds", "a person, saw, wood"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "a man using a tractor to cut a log into firewood youtube"], "captions_pred_audio": ["birds are chirping and a train is moving ", "an engine is idling and vibrating"], "question": "which entity is a person", "label": 1}, {"captions": ["water is sprayed across a hard surface", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sQwlkXjQabo", "su6FAOcOA8c"], "start_seconds": ["10", "4"], "properties": ["water, spray, surface", "engine, idle, woman"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["spraying followed by silence", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a dark barks and whimpers", "a toilet door squeaks as it is opened"], "sample_ids": ["sYj4hpDUZDQ", "sdXV-ylviw"], "start_seconds": ["30", "190"], "properties": ["barks, whimpers, dark", "door, toilet, squeaks"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", null], "captions_pred_audio": ["a dog barks and a cat meows", "a dog barks and taps with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "people applaud and hoot and chat quietly"], "sample_ids": ["xM4joTqDVp4", "wwyfGO2J4"], "start_seconds": ["160", "90"], "properties": ["background, chirp, birds", "people, applaud, hoot"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", null], "captions_pred_audio": ["birds are chirping and a train is moving ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a train horn blares as a train passes, then fades", "a stream of water runs briefly"], "sample_ids": ["zVacuqSb4LI", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["blares, fades, train", "stream, water, run"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a motorcycle engine is idling", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vZAqdHZ81yA", "xfaoyyzw2WU"], "start_seconds": ["180", "180"], "properties": ["engine, motorcycle, idling", "loud, jet engine, roar"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["an engine is idling loudly", "an aircraft engine roars and a man speaks "], "question": "which engine is louder", "label": 1}, {"captions": ["a baby laugh at a sputter", "an insect buzzes around continuously"], "sample_ids": ["sLUnaPT5gM8", "v25l1jef3JY"], "start_seconds": ["0", "0"], "properties": ["laugh, sputter, baby", "buzzes, continuously, insect"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "dog barking and vehicle engine idling followed shortly by vehicle engine revving"], "sample_ids": ["w2JXXIAdUdg", "zY3icUyMdh8"], "start_seconds": ["10", "20"], "properties": ["snoring, distance, person", "dog, bark, engine"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "footage of a bus driving through a residential street at night"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a car is driving and dogs are barking and squealing "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a infant makes noise and is excited", "a machine beeps continuously"], "sample_ids": ["wIJK3-5y0kA", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["noise, excited, infant", "beeps, machine, continuously"], "captions_pred_video": ["of a baby playing with a cat in a dark room", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["birds chirp as a bell rings", "speaking following by laughing and clapping"], "sample_ids": ["ziUT9IFTkjg", "u2f5NpsoHBg"], "start_seconds": ["10", "30"], "properties": ["chirp, bell, ring", "person, laugh, clap"], "captions_pred_video": [null, "is being projected on a screen at the front of the stage"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a woman is speaking and a crowd is clapping"], "question": "which entity is more likely to be a person", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "a child speaks in closed space"], "sample_ids": ["w2M4i1mklOA", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["alarm, gears, turn", "child, space, speak"], "captions_pred_video": ["footage of an antique clock", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "birds chirp and an owl hoots before a man speaks briefly"], "sample_ids": ["spYNpeN7rPY", "wRBHTgrbiwg"], "start_seconds": ["1", "50"], "properties": ["a clock, ticktock, man", "bird, owl, speak"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "of a bee pollinating the flowers in the field"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "birds are chirping and insects are buzzing"], "question": "which entity has a clock ticking?", "label": 0}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["ukg5L09Wpvo", "zFjIWfSD-4"], "start_seconds": ["150", "410"], "properties": ["a train, a horn, a bell", "People, motor, brakes"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", null], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor?", "label": 1}, {"captions": ["people speak softly as food sizzles", "a pigeon cooing as an insect buzzes by briefly"], "sample_ids": ["yhQ2Lg-7qDY", "yZrFNS7GFBQ"], "start_seconds": ["130", "30"], "properties": ["food, sizzle, speak", "pigeon, buzzes, insect"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "of the bird in the cage"], "captions_pred_audio": ["a faucet is running and a man is speaking", "an owl hoots in the background "], "question": "which entity is a bird?", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "people applaud and hoot and chat quietly"], "sample_ids": ["sxYkFKFIZD0", "wwyfGO2J4"], "start_seconds": ["20", "90"], "properties": ["screech, man, door", "people, applaud, hoot"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", null], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["an insect buzzes around continuously", "water runs into a sink while men speak"], "sample_ids": ["v25l1jef3JY", "vzceMbklWc"], "start_seconds": ["0", "180"], "properties": ["buzzes, continuously, insect", "water, sink, run"], "captions_pred_video": ["a black background with a cartoon character in the foreground", null], "captions_pred_audio": ["a fly is buzzing around a microphone ", "water is running and a man is speaking"], "question": "which entity is not a living thing", "label": 1}, {"captions": ["a man speaks while video game music plays with some clicking", "people cheer as a vehicle engine revs"], "sample_ids": ["tw76HGONaKg", "xjhAnI2q6hM"], "start_seconds": ["570", "6"], "properties": ["music, click, man", "engine revs, vehicle, people"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "someone is typing on a computer keyboard"], "sample_ids": ["s7knHCFW82w", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["blow horn, get close, train", "keyboard, type, computer"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "how to make money on youtube in spanish"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a person is typing on a keyboard"], "question": "which is not a type of computer", "label": 0}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["uEU-Hg5MTN8", "vfYTJq7nU"], "start_seconds": ["27", "130"], "properties": ["animal, grunts, snorts", "rustling, ducks, quack"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 0}, {"captions": ["a speedboat passes quickly on the water", "an infant crying as a woman laughs"], "sample_ids": ["tjmoSi330GM", "xhmRY9yhC7c"], "start_seconds": ["23", "20"], "properties": ["speed, water, boat", "a, laugh, infant"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "a baby cries and a woman speaks"], "question": "which is not a person", "label": 0}, {"captions": ["white noise and snoring with some rustling in the background", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["xzKKf9bKNUo", "ukg5L09Wpvo"], "start_seconds": ["10", "150"], "properties": ["background, noise, snoring", "clickety-clack, train, whistle"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a person snoring loudly", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "a frog croaks as other frogs croak in the background"], "sample_ids": ["sa6TLVbooCc", "yswmmRZFItk"], "start_seconds": ["240", "0"], "properties": ["people, laugh, child", "background, frog, croak"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "a close up of a frog in the water"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a frog is croaking"], "question": "which entity is a croaking animal?", "label": 1}, {"captions": ["a man speaks as horns blow", "people applaud and hoot and chat quietly"], "sample_ids": ["tHyNqRyK34A", "wwyfGO2J4"], "start_seconds": ["24", "90"], "properties": ["a, man, speaks", "people, applaud, hoot"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", null], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "a duck quacks continuously"], "sample_ids": ["wTideSjRFS0", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["food, sizzle, woman", "quacks, continuously, duck"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "an airplane engine runs"], "sample_ids": ["vzceMbklWc", "yVPZ2MNWpms"], "start_seconds": ["180", "0"], "properties": ["water, faucet, sink", "engine, airplane, runs"], "captions_pred_video": [null, "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["water is running and a man is speaking", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["goats bleat and people speak", "a duck quacks continuously"], "sample_ids": ["z5iUE5h0EPs", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["goats bleat, people speak, language", "quacks, continuously, duck"], "captions_pred_video": ["of the goat in the barn", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a goat bleats and a man speaks", "a duck is quacking loudly"], "question": "which animal is speaking", "label": 1}, {"captions": ["an infant crying as a woman laughs", "rain falls on a surface as men speak and thunder roars"], "sample_ids": ["xhmRY9yhC7c", "w0xsN8X18Y"], "start_seconds": ["20", "30"], "properties": ["a, laugh, infant", "rain, thunder, surface"], "captions_pred_video": ["of a baby crying in a baby bouncer", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking while a motorboat is moving in the background "], "question": "which entity is a natural event", "label": 1}, {"captions": ["water bubbles and gurgles.", "a man speaks as a car is passing by"], "sample_ids": ["tB7hWb9gTuQ", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["bubbles, gurgles, water", "a, car, pass"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["water is splashing and gurgling", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a moving object", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "a woman speaks as she rubs two objects together"], "sample_ids": ["vfYTJq7nU", "vzxHnu-SFEw"], "start_seconds": ["130", "80"], "properties": ["rustling, ducks, quack", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a skill", "label": 1}, {"captions": ["a door slams shut roughly", "several insects fly while two men talk"], "sample_ids": ["zkKdxzNC97Y", "s-T9OVOiMLo"], "start_seconds": ["27", "330"], "properties": ["a door, slams, shut", "several, fly, men"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more likely to be in a zoo", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "someone snores nearby"], "sample_ids": ["vBslzh7saPw", "spJCm8tD9Zo"], "start_seconds": ["90", "90"], "properties": ["power, scream, increase", "someone snores, nearby, someone"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a person is snoring loudly"], "question": "which is quieter", "label": 0}, {"captions": ["birds coo incessantly", "a duck quacks continuously"], "sample_ids": ["yZrFNS7GFBQ", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["coo, bird, incessant", "quacks, continuously, duck"], "captions_pred_video": ["of the bird in the cage", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["an owl hoots in the background ", "a duck is quacking loudly"], "question": "which bird is more likely to be a male", "label": 1}, {"captions": ["wind blowing followed by a zoom", "water splashes as an animal walks through"], "sample_ids": ["vr8ZXjEBhMQ", "w1ir-sZ3Im8"], "start_seconds": ["150", "90"], "properties": ["wind, blow, zoom", "animal, water, splashes"], "captions_pred_video": ["is taken from a motorcycle's point of view", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "water splashes and gurgles as people speak"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["a bird is chirping and tweeting a bird song", "a clock ticks quietly and rhythmically"], "sample_ids": ["wPz6QRAkEb4", "u7C-AEBQM"], "start_seconds": ["60", "30"], "properties": ["chirps, tweets, song", "ticks, rhythmic, quiet"], "captions_pred_video": ["a bird in a cage on top of a pole", null], "captions_pred_audio": ["birds are chirping in the background ", "a ticktock of a clock"], "question": "which entity is quieter", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "small dogs yip and bark sharply"], "sample_ids": ["sLUnaPT5gM8", "v-wcQf4BDY0"], "start_seconds": ["0", "120"], "properties": ["loud, laughter, intermittent", "bark, yip, sharply"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "an infant crying as a woman laughs"], "sample_ids": ["wqUmIEzuNz4", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["frog, bird, vocalize", "a, laugh, infant"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a cat meows and rustles", "a baby cries and a woman speaks"], "question": "which entity is a human", "label": 1}, {"captions": ["speaking following by laughing and clapping", "women speak as water runs briefly, children call out, and a man speaks"], "sample_ids": ["u2f5NpsoHBg", "uRExseg-0XI"], "start_seconds": ["30", "210"], "properties": ["person, laugh, clap", "woman, man, water"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a man is speaking while water is running and birds are chirping "], "question": "which entity has more people", "label": 1}, {"captions": ["a clock ticktocks briefly", "an airplane engine spools and people speak"], "sample_ids": ["u7C-AEBQM", "wTjoRj1se3U"], "start_seconds": ["30", "390"], "properties": ["ticktocks, clock, ticktocks briefly", "airplane, engine, spool"], "captions_pred_video": [null, "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a ticktock of a clock", "a jet engine is running and people are talking"], "question": "which entity is a moving object", "label": 1}, {"captions": ["someone whistles briefly", "some men converse over an engine running"], "sample_ids": ["uFoga8sHpiw", "sCiy7QS1U"], "start_seconds": ["90", "300"], "properties": ["sound, duration, pitch", "men, converse, engine"], "captions_pred_video": ["footage of a bird in a cage", null], "captions_pred_audio": ["a person whistles a song", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is longer", "label": 1}, {"captions": ["a person is snoring while sleeping", "an airplane engine spools and people speak"], "sample_ids": ["vJrjSeP17yE", "wTjoRj1se3U"], "start_seconds": ["40", "390"], "properties": ["a person is sleeping, snoring, person", "airplane, engine, spool"], "captions_pred_video": ["a black background with a small plane flying in the sky", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a person snoring loudly", "a jet engine is running and people are talking"], "question": "which entity is a moving object", "label": 1}, {"captions": ["a rumble grows louder", "a man speaks as a car is passing by"], "sample_ids": ["y4MY9mp8-TA", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["loudness, increase, rumble", "a, car, pass"], "captions_pred_video": ["a helicopter flying in the sky", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a helicopter flies overhead ", "a man is speaking with background noise and breathing sounds "], "question": "which is not a rumble", "label": 1}, {"captions": ["music plays followed by gunshots and then an explosion", "a infant makes noise and is excited"], "sample_ids": ["xKB8O8LTs6s", "wIJK3-5y0kA"], "start_seconds": ["70", "30"], "properties": ["music, gunshots, explosion", "noise, excited, infant"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motorcycle engine works nearby", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["tOSWIURC-4", "vbZ-0lGPneg"], "start_seconds": ["0", "30"], "properties": ["engine, work, nearby", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a lawn mower is running ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program playing far away?", "label": 1}, {"captions": ["water is sprayed across a hard surface", "someone sprays a liquid onto a hard surface making a hiss sound"], "sample_ids": ["sQwlkXjQabo", "zO-LSSY92ZM"], "start_seconds": ["10", "30"], "properties": ["water, spray, surface", "liquid, surface, sound"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'"], "captions_pred_audio": ["spraying followed by silence", "steam is hissing and hissing"], "question": "which entity is sprayed across a hard surface", "label": 0}, {"captions": ["an small aircraft engine runs and a boy speaks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["xSKJGCItUWE", "vb1fPSDI4c"], "start_seconds": ["10", "30"], "properties": ["engine, run, boy", "multiple, people, yell"], "captions_pred_video": ["footage of the helicopter flying in the room", null], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["loud, continuous burping", "propeller rearing loudly with some male and female voices interspersed in the background"], "sample_ids": ["y636gklDioE", "vqZuVbG6-HI"], "start_seconds": ["20", "130"], "properties": ["loud, continuous, burping", "background, male, female"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a person burps loudly several times", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a child speaks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["yW6FWLSLkx4", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["a, child, speaks", "stream, water, flow"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman and man speak while food is frying", "a horn rings out as a machine runs by"], "sample_ids": ["zk-xJGQU8-4", "slZLHwNbbt4"], "start_seconds": ["130", "300"], "properties": ["food, man, woman", "a, horn, run"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is about a machine running by?", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["w2M4i1mklOA", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["alarm, gears, turn", "a woman, laughs, animal"], "captions_pred_video": ["footage of an antique clock", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a woman is speaking and a baby is crying"], "question": "which entity is a clock?", "label": 0}, {"captions": ["a woman sneezes then speaks", "wind blowing followed by a zoom"], "sample_ids": ["x4dZyf9Gbj0", "vr8ZXjEBhMQ"], "start_seconds": ["130", "150"], "properties": ["sneezes, speaks, woman", "wind, blow, zoom"], "captions_pred_video": ["footage is blurry and out of focus", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman sneezes and speaks", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["wDVMhEdTiVw", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["gun, shoot, water", "engine, revs, vehicle"], "captions_pred_video": ["a blurry image of trees and water in the forest", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "a race car accelerates and revs its engine "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a man speaks as a vehicle engine idles", "a train horn blows as it passes by"], "sample_ids": ["shmR4OZtzqA", "zVacuqSb4LI"], "start_seconds": ["30", "30"], "properties": ["man, engine, idle", "horn, blows, train"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a man speaks while a motor runs", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train", "label": 1}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "multiple people speak and children yell while water gurgles"], "sample_ids": ["zgUgkpk78xU", "vb1fPSDI4c"], "start_seconds": ["70", "30"], "properties": ["clinking, humming, horn", "multiple, people, yell"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", null], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["people speak and laugh as a child speaks", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["sa6TLVbooCc", "zFjIWfSD-4"], "start_seconds": ["240", "410"], "properties": ["people, laugh, child", "People, motor, brakes"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", null], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a child speaking?", "label": 0}, {"captions": ["a woman and man are speaking", "a machine beeps continuously"], "sample_ids": ["vbpKkWvfOu4", "y682ml90jGw"], "start_seconds": ["560", "11"], "properties": ["two people, speaking, woman, man", "beeps, machine, continuously"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["an audience gives applause", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["x6iCUDmRpKQ", "tdWhHV3X25Q"], "start_seconds": ["38", "60"], "properties": ["applause, audience, give", "applause, audience, yells"], "captions_pred_video": ["a black background with the moon and stars in the sky", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a group of people are clapping and cheering", "a man is speaking and a crowd is clapping"], "question": "which audience is giving applause", "label": 1}, {"captions": ["continuous snoring", "plastic is tapped on while someone speaks"], "sample_ids": ["sLkeqCDJIyw", "wvKpEYswXO0"], "start_seconds": ["120", "150"], "properties": ["loud, snoring, noise", "plastic, tap, speak"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is quieter", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "women speak as water runs briefly, children call out, and a man speaks"], "sample_ids": ["uEU-Hg5MTN8", "uRExseg-0XI"], "start_seconds": ["27", "210"], "properties": ["animal, grunts, snorts", "woman, man, water"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking while water is running and birds are chirping "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["there are rhythmical snoring nearby", "paper is crumpling consistently"], "sample_ids": ["ujMt0-D-x2k", "v5cSxLaHADY"], "start_seconds": ["0", "0"], "properties": ["snoring, rhythmical, nearby", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of the dog playing with a toy on the floor", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a person is snoring loudly", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "some men converse over an engine running"], "sample_ids": ["sjlVMgdGSK0", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["car, revving, loudly", "men, converse, engine"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man speaks while a motorcycle revs and accelerates "], "question": "which is not a car", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["sOa7g-44Dag", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["audio, scratching, man", "rustling, ducks, quack"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", null], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a duck quacks and a woman speaks"], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a airplane flies overhead as a woman speaks", "someone is typing on a computer keyboard"], "sample_ids": ["zj2R0XoFr5k", "v0x1odnXtP0"], "start_seconds": ["50", "210"], "properties": ["airplane, fly, woman", "keyboard, type, computer"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "how to make money on youtube in spanish"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a person is typing on a keyboard"], "question": "which is not a person", "label": 0}, {"captions": ["a vehicle engine runs while a siren and horn sound", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["u--KhUW8l1Y", "wz7N8YRy74I"], "start_seconds": ["0", "30"], "properties": ["engine, sound, horn", "rooster, crow, background, men"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["wind blows as people chatter quietly", "small dogs yip and bark sharply"], "sample_ids": ["xBxDz0CFVn0", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["wind, chatter, people", "bark, yip, sharply"], "captions_pred_video": ["footage is blurry and out of focus", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "birds chirp and objects are moved around"], "sample_ids": ["uqFtmnhuqA8", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["a, b, c", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "insects buzz and a man speaks"], "question": "which entity is more like a bird chirping?", "label": 1}, {"captions": ["birds chirp and a dog breathes heavily", "small dogs yip and bark sharply"], "sample_ids": ["y2ZBGpgbhHM", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["dog, chirp, breathe", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["birds chirping and a dog panting", "a dog barks and growls"], "question": "which dog is more active", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "gunshots ring out, a man yells, and more shots follow"], "sample_ids": ["yaln9y8I7ms", "vKrYfzleLB8"], "start_seconds": ["230", "110"], "properties": ["female, flushes, toilet", "a, ring, gunshots"], "captions_pred_video": ["footage is blurry and out of focus", "stock footage of a person holding a gun in their hand"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a man is speaking with background noise and a cap gun is fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "a woman speaks happily and an animal chirps"], "sample_ids": ["uKCSGgof8gI", "uWAAAL4CIoc"], "start_seconds": ["12", "0"], "properties": ["chirps, distance, signal", "a woman, chirps, animal"], "captions_pred_video": ["footage of a street in a small town on a sunny day", null], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a woman is speaking and a dog is barking "], "question": "which entity is a person", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "water flows as men speak and yell"], "sample_ids": ["y8WEcpOlT3I", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["harsh, wind, blows", "water, flow, men"], "captions_pred_video": ["on how to use a sewing machine youtube", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["a man speaking together with birds chirping and distant murmuring", "a propeller rotates loudly and intensely"], "sample_ids": ["uiS58TNyUiw", "ugHJF0hfYkg"], "start_seconds": ["430", "10"], "properties": ["audio, man, speaking", "loud, intense, propeller"], "captions_pred_video": ["of the pigeon in the cage", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["multiple ducks quack continuously", "a clock ticktocks"], "sample_ids": ["wfHeoPDLMaM", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["multiple, quack, continuously", "ticktocks, clock, ticktocks"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["ducks are quacking", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "someone is typing on a computer keyboard"], "sample_ids": ["slZLHwNbbt4", "v0x1odnXtP0"], "start_seconds": ["300", "210"], "properties": ["clap, distance, horn", "keyboard, type, computer"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "how to make money on youtube in spanish"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["someone snores nearby", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["spJCm8tD9Zo", "uYT5gxnyMWM"], "start_seconds": ["90", "50"], "properties": ["someone snores, nearby, someone", "female, spraying, scream"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["several ducks are quacking and squawking", "water splashes and a door squeaks"], "sample_ids": ["wfHeoPDLMaM", "sdXV-ylviw"], "start_seconds": ["30", "190"], "properties": ["quacking, squawking, ducks", "sound, splash, door"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", null], "captions_pred_audio": ["ducks are quacking", "a dog barks and taps with background noise "], "question": "which entity is silent", "label": 1}, {"captions": ["scraping and female speech with distant music", "ticking continues without interruption"], "sample_ids": ["yHeVV-xeOxQ", "v-g-j2uTByM"], "start_seconds": ["130", "30"], "properties": ["female, speech, music", "ticking, continuous, clock"], "captions_pred_video": ["of a girl milking a goat's udder", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a clock is ticking loudly"], "question": "which entity is a clock", "label": 1}, {"captions": ["an electronic device bleeps once", "a child speaks in closed space"], "sample_ids": ["tHJ6JSa8Y4", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["bleeps, electronic, device", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a clock is ticking and beeping", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "water pouring and bubbling"], "sample_ids": ["siJFXfGWgDk", "uyRfq-jKPpo"], "start_seconds": ["50", "50"], "properties": ["a, bird, vehicle", "water, bubbles, pouring"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "a man speaks as a motor runs in the background"], "sample_ids": ["w9lpbUn0hPc", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["male, wind, rustling", "background, motor, run"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["an engine runs and wind blows", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vs65y4qmyBE", "su6FAOcOA8c"], "start_seconds": ["340", "4"], "properties": ["engine, run, wind", "engine, idle, woman"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "a woman speaks as she rubs two objects together"], "sample_ids": ["wRBHTgrbiwg", "vzxHnu-SFEw"], "start_seconds": ["50", "80"], "properties": ["bird, owl, speak", "two objects, woman, speak"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is about a woman speaking?", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "water splashes as an animal walks through"], "sample_ids": ["v5P-ThUCINM", "w1ir-sZ3Im8"], "start_seconds": ["400", "90"], "properties": ["background, chirp, bird", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking and birds are chirping", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a person speaks over rustling leaves", "some people speak"], "sample_ids": ["zOZleIRqZm4", "vbZ-0lGPneg"], "start_seconds": ["80", "30"], "properties": ["rustling, leaves, person", "some people speak English, some people speak Spanish, some people speak French"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking and a dog is whimpering"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["water runs into a sink while men speak", "people speak as gunfire rings out"], "sample_ids": ["vzceMbklWc", "wqTCwqVRDlk"], "start_seconds": ["180", "80"], "properties": ["water, sink, run", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["water is running and a man is speaking", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "a loud engine muffles a man as he speaks"], "sample_ids": ["vlS6YMeWAPo", "xyx6eNVEYRY"], "start_seconds": ["40", "380"], "properties": ["sheep, baa, birds", "loud, engine, muffles"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "footage of a helicopter landing on a runway at an airport"], "captions_pred_audio": ["a goat bleats and birds chirp", "an aircraft engine is running and a man is speaking "], "question": "which entity is muffled", "label": 1}, {"captions": ["a jet engine spools up and takes off", "children speak and play together"], "sample_ids": ["vBslzh7saPw", "yVVP8XvWJTo"], "start_seconds": ["90", "260"], "properties": ["engine, spools, takes", "children, speak, play"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of a playground at a school or daycare center"], "captions_pred_audio": ["a jet engine roars and accelerates ", "children are speaking and breathing with background noise "], "question": "which entity is more likely to be in motion", "label": 0}, {"captions": ["birds chirp and objects are moved around", "wind blowing followed by a zoom"], "sample_ids": ["yPUYU6t3rwo", "vr8ZXjEBhMQ"], "start_seconds": ["370", "150"], "properties": ["birds chirp, objects are moved around, birds", "wind, blow, zoom"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["insects buzz and a man speaks", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more likely to be a natural phenomenon", "label": 1}, {"captions": ["a child babbles as a woman speaks", "a man speaks as a motor runs in the background"], "sample_ids": ["wEBlkGWVWwE", "xZepNM9qcRA"], "start_seconds": ["260", "30"], "properties": ["a, babble, woman", "background, motor, run"], "captions_pred_video": ["shows a person writing on the whiteboard", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "people cheer as a vehicle engine revs"], "sample_ids": ["vs65y4qmyBE", "xjhAnI2q6hM"], "start_seconds": ["340", "6"], "properties": ["engine, run, man", "engine revs, vehicle, people"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a truck is revving its engine and a man is speaking "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["a male speaks and another male speaks", "a door opens and closes"], "sample_ids": ["viuTg1M-dqg", "vBHyYJ8pL0"], "start_seconds": ["30", "2"], "properties": ["two males, speaking, male", "open, close, door"], "captions_pred_video": ["footage of water coming out of a hole in the ground", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is a door?", "label": 1}, {"captions": ["a man talks while vehicles pass by", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sK4u5T8hW78", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["a, man, talk", "rooster, crow, background, men"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has more people in it", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "an engine runs loudly"], "sample_ids": ["wqZ135Ssz0", "vqZuVbG6-HI"], "start_seconds": ["60", "130"], "properties": ["man, woman, squawks", "loud, engine, run"], "captions_pred_video": [null, "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a lawn mower is running and men are speaking "], "question": "which is louder", "label": 1}, {"captions": ["a church bell rings several times", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sUVVjE3Ucp8", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["ring, bell, several", "three men, wind, flow"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a church bell is ringing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a bell ringing?", "label": 0}, {"captions": ["a woman speaks and other women and a man talk with her", "a propeller rotates loudly and intensely"], "sample_ids": ["vbpKkWvfOu4", "ugHJF0hfYkg"], "start_seconds": ["560", "10"], "properties": ["a, woman, man", "loud, intense, propeller"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a vehicle engine accelerating then running on idle"], "sample_ids": ["weDbePuc-Xc", "vYkA3cfXp5Q"], "start_seconds": ["40", "30"], "properties": ["music, slaps, human", "engine, accelerate, idle"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["t25U-v4k4ts", "w5W5Kqtc8E"], "start_seconds": ["40", "100"], "properties": ["a, chirps, bird", "wind, blow, vehicle"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", null], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running?", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "several insects fly while two men talk"], "sample_ids": ["sapQIQUhFc", "s-T9OVOiMLo"], "start_seconds": ["280", "330"], "properties": ["liquid, flow, distance", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more moving parts", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "a clock ticktocks"], "sample_ids": ["zofjfKhqLk8", "v-g-j2uTByM"], "start_seconds": ["10", "30"], "properties": ["noise, stop, motor", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["vehicles pass by on a roadway", "water pouring and bubbling"], "sample_ids": ["tgbONvsP47Y", "uyRfq-jKPpo"], "start_seconds": ["0", "50"], "properties": ["pass, vehicle, roadway", "water, bubbles, pouring"], "captions_pred_video": ["footage of a fire truck entering a garage", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a car is driving on the road ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "paper is crumpling consistently"], "sample_ids": ["ziUT9IFTkjg", "v5cSxLaHADY"], "start_seconds": ["10", "0"], "properties": ["background, birds, rustling", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "some men converse over an engine running"], "sample_ids": ["wSVhSdj0F0", "sCiy7QS1U"], "start_seconds": ["10", "300"], "properties": ["beep, clang, footsteps", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["sjlVMgdGSK0", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["car, revving, loudly", "clickety-clack, train, whistle"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a train blows its whistle and blows its horn "], "question": "which is quieter", "label": 0}, {"captions": ["a man rubs two objects together then speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vveS8HT7Uog", "wz7N8YRy74I"], "start_seconds": ["100", "30"], "properties": ["a man, objects, speak", "rooster, crow, background, men"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in it?", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "someone whistles a tune"], "sample_ids": ["se87d6yxEOA", "sIXTftIuUgw"], "start_seconds": ["10", "90"], "properties": ["run, whistle, pass", "someone, tune, whistle"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", null], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a person whistling a song"], "question": "which entity is a human", "label": 1}, {"captions": ["a baby cries and a woman moans", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["smDKStoHBJo", "xKB8O8LTs6s"], "start_seconds": ["0", "70"], "properties": ["a, cry, woman", "music, gunfire, explosion"], "captions_pred_video": ["a man holding a crying baby in his arms", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["spJCm8tD9Zo", "uYT5gxnyMWM"], "start_seconds": ["90", "50"], "properties": ["snores, wheezes, sleeps", "female, spraying, scream"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 0}, {"captions": ["winds blows roughly as a vehicle races past", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["xjvTpk2Zpr8", "tiDFTC-5vU"], "start_seconds": ["70", "30"], "properties": ["wind, blows, vehicle", "male, duck, laugh"], "captions_pred_video": ["footage of a dhl plane landing on the runway", null], "captions_pred_audio": ["a jet engine roars and wind blows ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["a motorcycle engine is idling", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["vZAqdHZ81yA", "uEU-Hg5MTN8"], "start_seconds": ["180", "27"], "properties": ["engine, motorcycle, idling", "animal, grunts, snorts"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["an engine is idling loudly", "a woman is speaking and a baby is crying"], "question": "which entity is not a motorcycle?", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vzxHnu-SFEw", "uZesmtKZGSw"], "start_seconds": ["80", "250"], "properties": ["two objects, woman, speak", "men, talk, cars"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["yRx9txMcBl0", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["accelerates, tires, squeals", "female, spraying, scream"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["water rushes by", "water flows and trickles"], "sample_ids": ["x-PeY8Yb8M4", "tB7hWb9gTuQ"], "start_seconds": ["300", "30"], "properties": ["water, rushes, by", "water, flow, trickle"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a car is driving on a wet road ", "water is splashing and gurgling"], "question": "which entity is more calm", "label": 1}, {"captions": ["water flows and trickles", "people speak as gunfire rings out"], "sample_ids": ["tB7hWb9gTuQ", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["water, flow, trickle", "gunfire, ring, speak"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["water is splashing and gurgling", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "a man speaks as a motor runs in the background"], "sample_ids": ["xl2PIWyXaM", "xZepNM9qcRA"], "start_seconds": ["160", "30"], "properties": ["chirp, man, younger person", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["birds are chirping and people are talking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["wind noise makes sound into a microphone", "winds blows roughly as a vehicle races past"], "sample_ids": ["w8uLijTqtlU", "xjvTpk2Zpr8"], "start_seconds": ["70", "70"], "properties": ["wind, microphone, noise", "wind, blows, vehicle"], "captions_pred_video": ["footage is blurry and shaky", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["the wind is blowing strongly", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "an airplane engine runs"], "sample_ids": ["vveS8HT7Uog", "yVPZ2MNWpms"], "start_seconds": ["100", "0"], "properties": ["a man, objects, speak", "engine, airplane, runs"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a car is driving by on the road "], "question": "which object is moving", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "a horn rings out as a machine runs by"], "sample_ids": ["zFjIWfSD-4", "slZLHwNbbt4"], "start_seconds": ["410", "300"], "properties": ["People, motor, brakes", "a, horn, run"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is about a machine running by?", "label": 1}, {"captions": ["birds chirp and wind blows", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sxIvBMSavMQ", "xKB8O8LTs6s"], "start_seconds": ["210", "70"], "properties": ["birds, chirp, wind", "music, gunfire, explosion"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "music plays, a person speaks, followed by whooshes and a ding"], "sample_ids": ["w5W5Kqtc8E", "tQWGZLItBXk"], "start_seconds": ["100", "170"], "properties": ["wind, engine, scream", "music, person, ding"], "captions_pred_video": [null, "worms revolution screenshots"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a child speaks music plays video game sounds sound effects and sound effects play "], "question": "which entity is a musical composition?", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["w0xsN8X18Y", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["music, surface, rain", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more likely to be a natural occurrence", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "an insect buzzes around continuously"], "sample_ids": ["x5cuQjOdM3E", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["cat, meows, young woman", "buzzes, continuously, insect"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a cat meows and a woman speaks", "a fly is buzzing around a microphone "], "question": "which entity is more active", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vlS6YMeWAPo", "sSMl2vc3ek"], "start_seconds": ["40", "20"], "properties": ["noise, bleat, call", "loud, multiple, distance"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", null], "captions_pred_audio": ["a goat bleats and birds chirp", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wqZ135Ssz0", "sSMl2vc3ek"], "start_seconds": ["60", "20"], "properties": ["man, woman, squawks", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a clock ticktocks continuously", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vlJS7LN2XyM", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["ticktocks, clock, ticktocks continuously", "wind, blow, vehicle"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", null], "captions_pred_audio": ["a ticktock of a clock", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is not a clock?", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["uWAAAL4CIoc", "vfYTJq7nU"], "start_seconds": ["0", "130"], "properties": ["a woman, chirps, animal", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a duck quacks and a woman speaks"], "question": "which entity has more animals", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "popping and crackling repeats as men yell and laugh"], "sample_ids": ["xl2PIWyXaM", "rqu8iB22IY"], "start_seconds": ["160", "5"], "properties": ["chirp, man, younger person", "sound, repeats, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and people are talking", "a dog barks and a man speaks while music plays "], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "a woman speaks happily and an animal chirps"], "sample_ids": ["vs65y4qmyBE", "uWAAAL4CIoc"], "start_seconds": ["340", "0"], "properties": ["engine, run, man", "a woman, chirps, animal"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a woman is speaking and a dog is barking "], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["an electric engine works nearby followed by a child talking", "dishes cling together then a man begins to speak"], "sample_ids": ["xSKJGCItUWE", "sQGXqGcwOTc"], "start_seconds": ["10", "3"], "properties": ["engine, work, child", "cling, speak, dishes"], "captions_pred_video": ["footage of the helicopter flying in the room", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["children cry and people talk", "an airplane flies overhead as a woman speaks"], "sample_ids": ["xLwHe825Zs", "zj2R0XoFr5k"], "start_seconds": ["18", "50"], "properties": ["people talk, children cry, people talk", "airplane, fly, overhead"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a baby cries and a woman speaks", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a plane flying overhead?", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "people speak as gunfire rings out"], "sample_ids": ["y8dSeubCNI", "wqTCwqVRDlk"], "start_seconds": ["4", "80"], "properties": ["engine revving, people speaking, motorcycle", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["an engine revving and people talking in the background", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["ukg5L09Wpvo", "vfYTJq7nU"], "start_seconds": ["150", "130"], "properties": ["clickety-clack, train, whistle", "rustling, ducks, quack"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", null], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a duck quacks and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sjlVMgdGSK0", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["car, revving, loudly", "a woman, something, fried"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["tw76HGONaKg", "vfYTJq7nU"], "start_seconds": ["570", "130"], "properties": ["A, game, keyboard", "rustling, ducks, quack"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", null], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a duck quacks and a woman speaks"], "question": "which entity is about a game?", "label": 0}, {"captions": ["a race car approaches quickly and slows down squealing tires", "a woman speaks as frying food sizzles"], "sample_ids": ["sEprKHm8Sj8", "wTideSjRFS0"], "start_seconds": ["90", "30"], "properties": ["car, tires, slows", "food, sizzle, woman"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking while water is running in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["pigeons vocalize and birds chirp", "wind blows as people chatter quietly"], "sample_ids": ["uiS58TNyUiw", "xBxDz0CFVn0"], "start_seconds": ["430", "30"], "properties": ["vocalize, bird, chirp", "wind, chatter, people"], "captions_pred_video": ["of the pigeon in the cage", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["bees buzz as wind blows", "a female speaks softly as paper crinkles"], "sample_ids": ["tMJne1a4AFI", "xvDdE3zNf8Y"], "start_seconds": ["0", "120"], "properties": ["bees, buzz, wind", "a, female, speaks"], "captions_pred_video": ["a swarm of bees on the ground", "of a woman in a white shirt and glasses holding a purple tie"], "captions_pred_audio": ["a swarm of bees buzzing around", "a woman speaks and crumples paper"], "question": "which entity is speaking softly", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["uzQnlJXBbOM", "uEU-Hg5MTN8"], "start_seconds": ["50", "27"], "properties": ["ringing, beep, stop", "a woman, laughs, animal"], "captions_pred_video": ["footage of a person using a cell phone on a table", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a telephone rings and a man speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["an infant crying as a woman laughs", "a woman speaks and other women and a man talk with her"], "sample_ids": ["xhmRY9yhC7c", "vbpKkWvfOu4"], "start_seconds": ["20", "560"], "properties": ["a, laugh, infant", "a, woman, man"], "captions_pred_video": ["of a baby crying in a baby bouncer", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a baby cries and a woman speaks", "a woman is speaking and a man is speaking"], "question": "which entity has more people", "label": 1}, {"captions": ["a man speaks as crickets sing", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["ryFDPxgDOGc", "wDVMhEdTiVw"], "start_seconds": ["570", "30"], "properties": ["a, crickets, sing", "gun, shoot, water"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a child yells and another yells", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["vMDHu7Lxcgw", "wqZ135Ssz0"], "start_seconds": ["410", "60"], "properties": ["two, yell, child", "two men, woman, birds"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", null], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["two frogs croak at each other", "a man speaks as a car is passing by"], "sample_ids": ["zg0X6BnhOLQ", "sK4u5T8hW78"], "start_seconds": ["410", "30"], "properties": ["two frogs, croak, at each other", "a, car, pass"], "captions_pred_video": ["footage of lightning in the sky at night", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a frog is croaking", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "a woman speaks happily and an animal chirps"], "sample_ids": ["wyllXV6PjKo", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["a kid, talk, cry", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman speaks and a baby cries", "a woman is speaking and a dog is barking "], "question": "which entity has a kid?", "label": 0}, {"captions": ["a man speaks then multiple motorcycles pass by", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["zcDwZ6W7E3E", "uZesmtKZGSw"], "start_seconds": ["180", "250"], "properties": ["a, man, speak", "men, talk, cars"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more vehicles", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "dishes cling together then a man begins to speak"], "sample_ids": ["vZAw4apG0Es", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["people, clock, converse", "cling, speak, dishes"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a clock is ticking and people are talking", "mechanisms are operating and water is splashing "], "question": "which entity is about a clock?", "label": 0}, {"captions": ["two men speak as a buffeting wind blows", "three men talk while wind blows and some liquid flows"], "sample_ids": ["y8WEcpOlT3I", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["wind, speak, buffeting", "three men, wind, flow"], "captions_pred_video": ["on how to use a sewing machine youtube", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more people", "label": 1}, {"captions": ["a person whistles a meandering tune", "water splashes as an animal walks through"], "sample_ids": ["uFoga8sHpiw", "w1ir-sZ3Im8"], "start_seconds": ["90", "90"], "properties": ["person, tune, whistle", "animal, water, splashes"], "captions_pred_video": ["footage of a bird in a cage", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a person whistles a song", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "water is sprayed across a hard surface"], "sample_ids": ["w2JXXIAdUdg", "sQwlkXjQabo"], "start_seconds": ["10", "10"], "properties": ["snoring, distance, person", "water, spray, surface"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a person snoring and a dog whimpering", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["uYT5gxnyMWM", "w34HjHr6gAY"], "start_seconds": ["50", "30"], "properties": ["female, spraying, scream", "beeps, hit, woman"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a beep sounds followed by a child speaking"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "water flows as men speak and yell"], "sample_ids": ["tw76HGONaKg", "vJ7JPEFhyLA"], "start_seconds": ["570", "16"], "properties": ["A, game, keyboard", "water, flow, men"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a man speaking while playing a video game on a keyboard?", "label": 0}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "a man speaks as a motor runs in the background"], "sample_ids": ["wvKpEYswXO0", "xZepNM9qcRA"], "start_seconds": ["150", "30"], "properties": ["water, tap, run", "background, motor, run"], "captions_pred_video": ["of the person preparing food in the kitchen", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a person snores loudly multiple times at a close distance", "roadway noise occurs and a truck accelerates"], "sample_ids": ["sSMl2vc3ek", "tgbONvsP47Y"], "start_seconds": ["20", "0"], "properties": ["loud, multiple, distance", "noise, truck, accelerate"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a person snoring loudly", "a car is driving on the road "], "question": "which noise is louder", "label": 1}, {"captions": ["some tunes played by whistling", "bees buzz as wind blows"], "sample_ids": ["u6BnG6YZqJ4", "tMJne1a4AFI"], "start_seconds": ["0", "0"], "properties": ["tune, play, whistling", "bees, buzz, wind"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", "a swarm of bees on the ground"], "captions_pred_audio": ["a person whistling a song", "a swarm of bees buzzing around"], "question": "which entity is not a musical instrument", "label": 1}, {"captions": ["food is frying and sizzles", "some men converse over an engine running"], "sample_ids": ["zNRChLjqcU", "sCiy7QS1U"], "start_seconds": ["220", "300"], "properties": ["food is frying, sizzles, food", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running from a faucet into a sink", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "water rushes by"], "sample_ids": ["s7knHCFW82w", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["blow horn, get close, train", "water, rushes, by"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a car is driving on a wet road "], "question": "which is a moving object", "label": 0}, {"captions": ["some tunes played by whistling", "a man speaks as insects buzz and a bird chirps"], "sample_ids": ["u6BnG6YZqJ4", "t25U-v4k4ts"], "start_seconds": ["0", "40"], "properties": ["tune, play, whistling", "a, chirps, bird"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["a person whistling a song", "a man is speaking and bees are buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["yajyRTUQk3U", "wDVMhEdTiVw"], "start_seconds": ["400", "30"], "properties": ["a woman, something, fried", "gun, shoot, water"], "captions_pred_video": ["- a woman cooking in the kitchen", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used for hunting", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vbr9mHKc8WM", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["noise, loudness, engine", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["an engine is idling", "a woman is speaking and a baby is crying"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man laughs and speaks as cats purr and hiss", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vVhthZ45k3Y", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["cat, purr, hiss", "three men, wind, flow"], "captions_pred_video": ["footage is blurry and out of focus", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "a toilet door squeaks as it is opened"], "sample_ids": ["ukxt9I7eMMg", "sdXV-ylviw"], "start_seconds": ["30", "190"], "properties": ["food, pan, cook", "door, toilet, squeaks"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a dog barks and taps with background noise "], "question": "which door is squeaking", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["ziUT9IFTkjg", "tDlysoZiA1I"], "start_seconds": ["10", "0"], "properties": ["background, birds, rustling", "animal, grunts, chirps"], "captions_pred_video": [null, "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "birds are chirping and a rooster is crowing "], "question": "which entity has more birds", "label": 1}, {"captions": ["a person speaks over rustling leaves", "an insect buzzes around continuously"], "sample_ids": ["zOZleIRqZm4", "v25l1jef3JY"], "start_seconds": ["80", "0"], "properties": ["rustling, leaves, person", "buzzes, continuously, insect"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["tIY7qOV3rEM", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "engine, idle, woman"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a woman is speaking and a subway train is moving "], "question": "which entity is a human activity", "label": 1}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "people applaud and hoot and chat quietly"], "sample_ids": ["w9lpbUn0hPc", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["male, wind, rustling", "people, applaud, hoot"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", null], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be in a theater", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "a crowd yells, reacts and applauds"], "sample_ids": ["s4Uz1Ffgo04", "wztCSUxOf8"], "start_seconds": ["100", "130"], "properties": ["water, rushes, motorcycle", "a crowd, yells, applauds"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking and a crowd is clapping"], "question": "which entity is more likely to be a video of a person riding a motorcycle?", "label": 0}, {"captions": ["water is sprayed across a hard surface", "water splashes and a door squeaks"], "sample_ids": ["sQwlkXjQabo", "sdXV-ylviw"], "start_seconds": ["10", "190"], "properties": ["water, spray, surface", "sound, splash, door"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", null], "captions_pred_audio": ["spraying followed by silence", "a dog barks and taps with background noise "], "question": "which entity is sprayed across a hard surface", "label": 0}, {"captions": ["someone sprays liquid onto a hard surface", "waves crash against a shoreline and people speak"], "sample_ids": ["sQwlkXjQabo", "yFB25fqfU8I"], "start_seconds": ["10", "300"], "properties": ["liquid, surface, spray", "wave, crash, shoreline"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "footage of a person surfing in the ocean"], "captions_pred_audio": ["spraying followed by silence", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be seen in a movie", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "a telephone rings followed by a woman talking"], "sample_ids": ["vddP56-ogds", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["water, flow, laugh", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["leaves rustle while man speaks", "wind blowing followed by a zoom"], "sample_ids": ["zOZleIRqZm4", "vr8ZXjEBhMQ"], "start_seconds": ["80", "150"], "properties": ["leaves, rustle, speak", "wind, blow, zoom"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["a jet engine roars, almost making a man inaudible", "an electric engine works nearby followed by a child talking"], "sample_ids": ["xfaoyyzw2WU", "xSKJGCItUWE"], "start_seconds": ["180", "10"], "properties": ["loud, jet engine, roar", "engine, work, child"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "footage of the helicopter flying in the room"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a high pitched engine is running and a child speaks"], "question": "which engine is quieter", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "water drips and bubbles as a man speaks"], "sample_ids": ["y8WEcpOlT3I", "vSeGhaZt-aI"], "start_seconds": ["40", "50"], "properties": ["wind, speak, buffeting", "water, bubbles, speak"], "captions_pred_video": ["on how to use a sewing machine youtube", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking and pouring liquid with background noise "], "question": "which entity is more calm", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["ugHJF0hfYkg", "yDoT73BWsdA"], "start_seconds": ["10", "10"], "properties": ["engine, running, continuously", "engine, revs, vehicle"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a helicopter is flying overhead ", "a race car accelerates and revs its engine "], "question": "which entity has a running engine", "label": 0}, {"captions": ["water pouring and bubbling", "paper folding and crinkling"], "sample_ids": ["uyRfq-jKPpo", "zPpG3RD8lSs"], "start_seconds": ["50", "20"], "properties": ["water, bubbles, pouring", "paper, fold, crinkle"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["water is running from a faucet", "the wind blows and a mouse clicks "], "question": "which entity is not a liquid", "label": 1}, {"captions": ["a man speaks as horns blow", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["tHyNqRyK34A", "w5W5Kqtc8E"], "start_seconds": ["24", "100"], "properties": ["a, man, speaks", "wind, blow, vehicle"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", null], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blowing?", "label": 1}, {"captions": ["birds chirp and a pop occurs before a man speaks", "several insects fly while two men talk"], "sample_ids": ["zuua6-5goWw", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["sound, pop, bird", "several, fly, men"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more flying insects", "label": 1}, {"captions": ["an engine runs and wind blows", "people speak then an engine runs"], "sample_ids": ["vs65y4qmyBE", "uMTTDZ2mb4"], "start_seconds": ["340", "30"], "properties": ["engine, run, wind", "engine, run, people"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "people are talking and a car is driving by with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "a woman speaks as she rubs two objects together"], "sample_ids": ["zgUgkpk78xU", "vzxHnu-SFEw"], "start_seconds": ["70", "80"], "properties": ["horn, bells, ring", "two objects, woman, speak"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a warning device?", "label": 0}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "a machine clanks and thumps and a male speaks"], "sample_ids": ["sU53zg9Jp7s", "sWZzXuWYY"], "start_seconds": ["380", "420"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "male, clanks, thumps"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", null], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a sewing machine runs and a man speaks"], "question": "which entity has a male speaking?", "label": 1}, {"captions": ["children cry and people talk", "a horn honks and then loudly blares"], "sample_ids": ["xLwHe825Zs", "wnpJndXuxLc"], "start_seconds": ["18", "50"], "properties": ["people talk, children cry, people talk", "horn, honk, loud"], "captions_pred_video": [null, "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a baby cries and a woman speaks", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["xl2PIWyXaM", "yajyRTUQk3U"], "start_seconds": ["160", "400"], "properties": ["chirp, man, younger person", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["birds are chirping and people are talking", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["water bubbles and gurgles.", "waves crash against a shoreline and people speak"], "sample_ids": ["tB7hWb9gTuQ", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["bubbles, gurgles, water", "wave, crash, shoreline"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "footage of a person surfing in the ocean"], "captions_pred_audio": ["water is splashing and gurgling", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more calm", "label": 0}, {"captions": ["a horn blasts as warning bells ring", "small dogs yip and bark sharply"], "sample_ids": ["zgUgkpk78xU", "v-wcQf4BDY0"], "start_seconds": ["70", "120"], "properties": ["horn, bells, ring", "bark, yip, sharply"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["a man speaks while video game music plays with some clicking", "a car accelerates and wind blows"], "sample_ids": ["tw76HGONaKg", "u0TrcHhkPQ"], "start_seconds": ["570", "20"], "properties": ["music, click, man", "accelerates, wind, blows"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", null], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["people clap and speak in the distance", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["wwyfGO2J4", "tDlysoZiA1I"], "start_seconds": ["90", "0"], "properties": ["clap, distance, speak", "animal, grunts, chirps"], "captions_pred_video": [null, "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "birds are chirping and a rooster is crowing "], "question": "which entity is more animal like", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "a duck quacks continuously"], "sample_ids": ["s59PfAghdkM", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["bird, chirp, background, horse, neigh", "quacks, continuously, duck"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 0}, {"captions": ["a toilet flushes and water sputters as it drains", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["smGI3C1NZc", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["water, drain, toilet", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a toilet is flushed", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a person whistles a meandering tune", "a person speaks over rustling leaves"], "sample_ids": ["uFoga8sHpiw", "zOZleIRqZm4"], "start_seconds": ["90", "80"], "properties": ["person, tune, whistle", "rustling, leaves, person"], "captions_pred_video": ["footage of a bird in a cage", "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a person whistles a song", "a man is speaking with crickets chirping in the background"], "question": "which person is speaking over rustling leaves", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "water pouring and bubbling"], "sample_ids": ["uqFtmnhuqA8", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["a, b, c", "water, bubbles, pouring"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "material crumbles into a microphone"], "sample_ids": ["zY3icUyMdh8", "vofpvUo6NAw"], "start_seconds": ["20", "220"], "properties": ["dog, bark, engine", "material, crumbles, microphone"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "person wrapping a toy car in a plastic bag"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "paper is being crumpled and crinkled"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["there are rhythmical snoring nearby", "water is sprayed across a hard surface"], "sample_ids": ["ujMt0-D-x2k", "sQwlkXjQabo"], "start_seconds": ["0", "10"], "properties": ["snoring, rhythmical, nearby", "water, spray, surface"], "captions_pred_video": ["of the dog playing with a toy on the floor", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a person is snoring loudly", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an audience gives applause", "a man speaks as a motor runs in the background"], "sample_ids": ["x6iCUDmRpKQ", "xZepNM9qcRA"], "start_seconds": ["38", "30"], "properties": ["applause, audience, give", "background, motor, run"], "captions_pred_video": ["a black background with the moon and stars in the sky", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a group of people are clapping and cheering", "a man speaks while a motorcycle revs and accelerates "], "question": "which is a man speaking", "label": 1}, {"captions": ["an engine runs loudly", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vqZuVbG6-HI", "vfYTJq7nU"], "start_seconds": ["130", "130"], "properties": ["loud, engine, run", "rustling, ducks, quack"], "captions_pred_video": ["footage is blurry because it's raining outside", null], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a duck quacks and a woman speaks"], "question": "which entity is quieter", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "water flows as men speak and yell"], "sample_ids": ["xfaoyyzw2WU", "vJ7JPEFhyLA"], "start_seconds": ["180", "16"], "properties": ["loud, jet engine, roar", "water, flow, men"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which is louder", "label": 1}, {"captions": ["a propeller moves loudly nearby", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["ugHJF0hfYkg", "zl9Dqx-j7q4"], "start_seconds": ["10", "6"], "properties": ["loud, propeller, move", "engine, laugh, loud"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a helicopter is flying overhead ", "a jet engine roars "], "question": "which is louder", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["uzQnlJXBbOM", "wz7N8YRy74I"], "start_seconds": ["50", "30"], "properties": ["ringing, beep, stop", "rooster, crow, background, men"], "captions_pred_video": ["footage of a person using a cell phone on a table", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a telephone rings and a man speaks", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "a vehicle engine accelerates and wind blows"], "sample_ids": ["xNMovAf3o50", "wudZTNBtVqc"], "start_seconds": ["0", "60"], "properties": ["rain, thunder, music", "accelerates, engine, wind"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", "footage is of a parking lot with cars parked in it"], "captions_pred_audio": ["thunder and rain with music playing in the background ", "a car accelerates and revs its engine "], "question": "which entity is not a weather phenomenon", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "a man speaks then blows a vehicle horn as wind blows"], "sample_ids": ["s4Uz1Ffgo04", "zALy31PjDl0"], "start_seconds": ["100", "21"], "properties": ["water, rushes, motorcycle", "a man, a vehicle, a horn"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "a motorcycle is parked on the side of a brick walkway"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking and a car horn is honking"], "question": "which entity is about a vehicle?", "label": 0}, {"captions": ["wind blows and people talk while livestock vocalizes", "a frog croaks as other frogs croak in the background"], "sample_ids": ["vXlk0lIQBFo", "yswmmRZFItk"], "start_seconds": ["470", "0"], "properties": ["wind, talk, vocalize", "background, frog, croak"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "a close up of a frog in the water"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a frog is croaking"], "question": "which entity is a single animal", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "birds chirp and objects are moved around"], "sample_ids": ["ul60S8TXDA8", "yPUYU6t3rwo"], "start_seconds": ["60", "370"], "properties": ["sound, distance, bell", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "an infant crying as a woman laughs"], "sample_ids": ["yZrFNS7GFBQ", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["pigeon, buzzes, insect", "a, laugh, infant"], "captions_pred_video": ["of the bird in the cage", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["an owl hoots in the background ", "a baby cries and a woman speaks"], "question": "which entity is crying", "label": 1}, {"captions": ["a clock ticktocks briefly", "a boat travels through the waves as the wind blows loudly and a man speaks over a radio"], "sample_ids": ["u7C-AEBQM", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["ticktocks, clock, ticktocks briefly", "wind, radio, waves"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["wind noise makes sound into a microphone", "a stream of water runs briefly"], "sample_ids": ["w8uLijTqtlU", "x-PeY8Yb8M4"], "start_seconds": ["70", "300"], "properties": ["wind, microphone, noise", "stream, water, run"], "captions_pred_video": ["footage is blurry and shaky", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["the wind is blowing strongly", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "a music is played followed by a frog croaking and then music is played again"], "sample_ids": ["tw76HGONaKg", "voJh2gJxXhA"], "start_seconds": ["570", "50"], "properties": ["A, game, keyboard", "music, frog, croak"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "a frog on a black background with a red diamond in the center"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "music is playing and crickets are chirping "], "question": "which entity is not a video game", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["wqZ135Ssz0", "vbZ-0lGPneg"], "start_seconds": ["60", "30"], "properties": ["man, woman, squawks", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program?", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["wz7N8YRy74I", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["rooster, crow, background, people", "harsh, wind, blows"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a man is speaking with wind noise in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["tDlysoZiA1I", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["animal, grunt, multiple", "engine, idle, woman"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a woman is speaking and a subway train is moving "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "someone is typing on a computer keyboard"], "sample_ids": ["uWAAAL4CIoc", "v0x1odnXtP0"], "start_seconds": ["0", "210"], "properties": ["a woman, chirps, animal", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "a fly buzzes around loudly as birds chirp"], "sample_ids": ["ylpYOorfH4o", "uJV8NDaHqqk"], "start_seconds": ["410", "100"], "properties": ["motor, run, steady", "loud, fly, chirp"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "a bee hive in a wooden box"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a swarm of bees buzzing around"], "question": "which entity is louder", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "a man speaks followed by another man speaking outside"], "sample_ids": ["zsLxS-uLJTw", "viuTg1M-dqg"], "start_seconds": ["20", "30"], "properties": ["horn, blast, train", "two men, speak, follow"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a single speaker?", "label": 0}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "water flows and trickles"], "sample_ids": ["yYEVLuqEytU", "tB7hWb9gTuQ"], "start_seconds": ["40", "30"], "properties": ["animal, pig, background", "water, flow, trickle"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["several sheep bleat and a man speaks", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "an adult male speaks and dials a rotary phone"], "sample_ids": ["uPDn2BFTHk", "tK4VlLsNxak"], "start_seconds": ["140", "120"], "properties": ["woman, laughs, speaks", "An adult male speaks, dials, and speaks into a rotary phone"], "captions_pred_video": [null, "person is wearing a headset and holding a remote control in his hand"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking and using a sewing machine"], "question": "which entity is a person", "label": 1}, {"captions": ["food is frying then a woman speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["ukxt9I7eMMg", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["food, woman, speak", "engine, laugh, loud"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "a man speaks followed by another man speaking outside"], "sample_ids": ["se87d6yxEOA", "viuTg1M-dqg"], "start_seconds": ["10", "30"], "properties": ["run, whistle, pass", "two men, speak, follow"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a man speaking followed by another man speaking outside?", "label": 1}, {"captions": ["someone whistles a tune", "people speak in the background as a clock ticktocks"], "sample_ids": ["sIXTftIuUgw", "vZAw4apG0Es"], "start_seconds": ["90", "30"], "properties": ["someone, tune, whistle", "background, clock, ticktocks"], "captions_pred_video": [null, "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["a person whistling a song", "a clock is ticking and people are talking"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a male speaks over some small clicks", "pigeons vocalize and birds chirp"], "sample_ids": ["uXxVebHsGZ8", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["male, clicks, speak", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "people applaud and hoot and chat quietly"], "sample_ids": ["wqUmIEzuNz4", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["frog, bird, vocalize", "people, applaud, hoot"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", null], "captions_pred_audio": ["a cat meows and rustles", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks, then dials a rotary telephone", "a child speaks in closed space"], "sample_ids": ["tK4VlLsNxak", "yW6FWLSLkx4"], "start_seconds": ["120", "40"], "properties": ["a, dial, telephone", "child, space, speak"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "a soft wind underscores a woman laughing"], "sample_ids": ["x9JovgqUcs", "s6DESzUTGjY"], "start_seconds": ["500", "16"], "properties": ["a, man, speaks, keyboard", "wind, laugh, woman"], "captions_pred_video": [null, "how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a motorboat is moving with wind noise in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["a goat screams and people speak in the background", "a toilet flushes and a female speaks"], "sample_ids": ["xC8kbrKJmco", "yaln9y8I7ms"], "start_seconds": ["0", "230"], "properties": ["background, goat, scream", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a goat is bleating ", "a toilet flushes and a man speaks"], "question": "which entity is silent", "label": 1}, {"captions": ["speaking following by laughing and clapping", "multiple birds chirp and an animal grunts"], "sample_ids": ["u2f5NpsoHBg", "tDlysoZiA1I"], "start_seconds": ["30", "0"], "properties": ["person, laugh, clap", "animal, grunt, multiple"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "birds are chirping and a rooster is crowing "], "question": "which entity is a person", "label": 0}, {"captions": ["a man speaks as music plays before artillery is fired", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["vcmWSmvti8", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["music, man, fire", "animal, grunts, snorts"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a woman is speaking and a baby is crying"], "question": "which entity is about a woman speaking and an animal grunting and snorting?", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "people speak as gunfire rings out"], "sample_ids": ["vfYTJq7nU", "wqTCwqVRDlk"], "start_seconds": ["130", "80"], "properties": ["rustling, ducks, quack", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "a propeller rotates loudly and intensely"], "sample_ids": ["vhJWZheqaE", "ugHJF0hfYkg"], "start_seconds": ["0", "10"], "properties": ["water drains unevenly, toilet flushes, water drains", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a toilet is flushed", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp and pigeons vocalize while walking around", "a telephone rings followed by a woman talking"], "sample_ids": ["wIvYjuR3nrg", "tGcFnX0GHI"], "start_seconds": ["9", "0"], "properties": ["birds, pigeons, vocalize", "ring, talk, woman"], "captions_pred_video": ["footage of a pigeon sitting on a roof with trees in the background", null], "captions_pred_audio": ["birds are chirping and cooing", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "vehicles pass by on a roadway"], "sample_ids": ["tQWGZLItBXk", "tgbONvsP47Y"], "start_seconds": ["170", "0"], "properties": ["voice, music, whoosh", "pass, vehicle, roadway"], "captions_pred_video": ["worms revolution screenshots", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a car is driving on the road "], "question": "which entity is more active", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "waves crash against a shoreline and people speak"], "sample_ids": ["zcDwZ6W7E3E", "yFB25fqfU8I"], "start_seconds": ["180", "300"], "properties": ["man, speak, motorcycles", "wave, crash, shoreline"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which is a natural phenomenon", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "a man speaks as a car is passing by"], "sample_ids": ["ylpYOorfH4o", "sK4u5T8hW78"], "start_seconds": ["410", "30"], "properties": ["engine, run, loud", "a, car, pass"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking with background noise and breathing sounds "], "question": "which entity is quieter", "label": 1}, {"captions": ["someone snores nearby", "a man speaks while a machine runs before a smoke alarm beeps"], "sample_ids": ["spJCm8tD9Zo", "sG7TyPnFDR0"], "start_seconds": ["90", "180"], "properties": ["someone snores, nearby, someone", "beeps, machine, smoke alarm"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a person is using an espresso machine in a restaurant"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a microwave oven is beeping "], "question": "which entity is about a machine?", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "an insect buzzes around continuously"], "sample_ids": ["x9JovgqUcs", "v25l1jef3JY"], "start_seconds": ["500", "0"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "buzzes, continuously, insect"], "captions_pred_video": [null, "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a fly is buzzing around a microphone "], "question": "which entity is not a person", "label": 1}, {"captions": ["an insect buzzes around continuously", "a child speaks in closed space"], "sample_ids": ["v25l1jef3JY", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["buzzes, continuously, insect", "child, space, speak"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is not a child?", "label": 0}, {"captions": ["food is frying while a woman speaks", "a vehicles accelerate quickly and someone laughs"], "sample_ids": ["yhQ2Lg-7qDY", "uWPRNLnpy7Y"], "start_seconds": ["130", "10"], "properties": ["food, woman, speak", "accelerate, laugh, vehicle"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "is taken from a car driving down the street"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "cats meow and then a person begins to talk while the cats continue to meow"], "sample_ids": ["vBHyYJ8pL0", "x5cuQjOdM3E"], "start_seconds": ["2", "30"], "properties": ["noise, door, opening", "cat, talk, meow"], "captions_pred_video": [null, "a black background with an airplane flying in the sky"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a cat meows and a woman speaks"], "question": "which entity is accompanied by a person talking", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["sAam2NqGhLY", "yDoT73BWsdA"], "start_seconds": ["20", "10"], "properties": ["snoring, breathing, child", "engine, revs, vehicle"], "captions_pred_video": ["of a little girl sleeping on a couch", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a person is snoring", "a race car accelerates and revs its engine "], "question": "which entity is not a person", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "a vehicle engine accelerating then running on idle"], "sample_ids": ["wRV8yMk886E", "vYkA3cfXp5Q"], "start_seconds": ["0", "30"], "properties": ["liquid, spray, nozzle", "engine, accelerate, idle"], "captions_pred_video": ["two cars are parked in a parking lot at night", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man speaks followed by a loud burst", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man speaks as a machine runs", "people cheer as a vehicle engine revs"], "sample_ids": ["vD6lYD1l0BY", "xjhAnI2q6hM"], "start_seconds": ["330", "6"], "properties": ["a, machine, run", "engine revs, vehicle, people"], "captions_pred_video": ["game controller being held in the hands of the person", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "a stream of water runs briefly"], "sample_ids": ["ukxt9I7eMMg", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["food, pan, cook", "stream, water, run"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["an airplane engine roars increasingly louder", "water is sprayed across a hard surface"], "sample_ids": ["vBslzh7saPw", "sQwlkXjQabo"], "start_seconds": ["90", "10"], "properties": ["engine, roar, louder", "water, spray, surface"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a jet engine roars and accelerates ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["ticking continues without interruption", "a man speaks and is typing on a keyboard"], "sample_ids": ["v-g-j2uTByM", "x9JovgqUcs"], "start_seconds": ["30", "500"], "properties": ["ticking, continuous, clock", "a, man, speaks, keyboard"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", null], "captions_pred_audio": ["a clock is ticking loudly", "a man speaks and types on a keyboard"], "question": "which entity is not continuous", "label": 1}, {"captions": ["water splashes as an animal walks through", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["w1ir-sZ3Im8", "vfYTJq7nU"], "start_seconds": ["90", "130"], "properties": ["animal, water, splashes", "rustling, ducks, quack"], "captions_pred_video": ["footage of a group of people riding horses through a river", null], "captions_pred_audio": ["water splashes and gurgles as people speak", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 0}, {"captions": ["a man makes an exclamation, then another man speaks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["xO-Q2BlIIPU", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["two men, exclamation, speak", "men, talk, cars"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "a weapon fires multiple times"], "sample_ids": ["x9JovgqUcs", "sMC07Ucy7kg"], "start_seconds": ["500", "10"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "weapon, fire, multiple"], "captions_pred_video": [null, "footage is from a car's point of view"], "captions_pred_audio": ["a man speaks and types on a keyboard", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["dogs barking and whimpering", "a propeller rotates loudly and intensely"], "sample_ids": ["tIY7qOV3rEM", "ugHJF0hfYkg"], "start_seconds": ["0", "10"], "properties": ["barking, whimpering, dog", "loud, intense, propeller"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "a small engine idles continuously"], "sample_ids": ["xBxDz0CFVn0", "y5WII6cTH7k"], "start_seconds": ["30", "40"], "properties": ["stream, water, flow", "engine, idle, continuously"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a sewing machine stitching a red and white hat"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "an engine is knocking and vibrating "], "question": "which entity is not a stream of water?", "label": 1}, {"captions": ["an airplane engine runs", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["yVPZ2MNWpms", "ukg5L09Wpvo"], "start_seconds": ["0", "150"], "properties": ["engine, airplane, runs", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a car is driving by on the road ", "a train blows its whistle and blows its horn "], "question": "which entity is a train", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "an airplane engine spools and people speak"], "sample_ids": ["vSeGhaZt-aI", "wTjoRj1se3U"], "start_seconds": ["50", "390"], "properties": ["water, bubbles, run", "airplane, engine, spool"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a jet engine is running and people are talking"], "question": "which entity is a video of a man speaking and water bubbles and runs?", "label": 0}, {"captions": ["paper is crumpling consistently", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["v5cSxLaHADY", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "a woman, laughs, animal"], "captions_pred_video": ["footage of the person holding a pair of scissors", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["paper is crumpled and crinkled", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["a clock ticktocks in wind", "a saw finishes running as metal clings in the background"], "sample_ids": ["yVumC9TGknc", "zofjfKhqLk8"], "start_seconds": ["30", "10"], "properties": ["ticktocks, clock, wind", "background, metal, clings"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "footage of a man using a machine to cut a piece of wood"], "captions_pred_audio": ["a series of beeps and chirps", "a large engine is running and a bell is ringing"], "question": "which entity is a clock", "label": 0}, {"captions": ["loud intermittent buzzing with intermittent laughter", "a woman speaks happily and an animal chirps"], "sample_ids": ["sLUnaPT5gM8", "uWAAAL4CIoc"], "start_seconds": ["0", "0"], "properties": ["loud, laughter, intermittent", "a woman, chirps, animal"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", null], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "winds blows roughly as a vehicle races past"], "sample_ids": ["vVhthZ45k3Y", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["cat, purr, hiss", "wind, blows, vehicle"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a jet engine roars and wind blows "], "question": "which entity is more likely to be a windy day", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "water flows and trickles"], "sample_ids": ["wy1eKjR7KC0", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["people, talk, distance", "water, flow, trickle"], "captions_pred_video": ["two police officers riding motorcycles down the street", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking and a siren is going off", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["an airplane accelerates briefly", "water splashes as an animal walks through"], "sample_ids": ["zjTG0gaGCUI", "w1ir-sZ3Im8"], "start_seconds": ["80", "90"], "properties": ["accelerates, airplane, briefly", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a jet engine roars as wind blows ", "water splashes and gurgles as people speak"], "question": "which entity is more likely to be a car", "label": 1}, {"captions": ["a toilet flushes and water drains", "a man talks while a clock does ticktock"], "sample_ids": ["sfAvvZwdLCY", "spYNpeN7rPY"], "start_seconds": ["20", "1"], "properties": ["water drains, flushes, water", "a clock, ticktock, man"], "captions_pred_video": ["footage of the toilet in the bathroom", "in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking and breathing with background noise "], "question": "which entity is a clock?", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "people cheer as a vehicle engine revs"], "sample_ids": ["w-4gHptFNuU", "xjhAnI2q6hM"], "start_seconds": ["21", "6"], "properties": ["engine revs, accelerates, bump", "engine revs, vehicle, people"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["zkKdxzNC97Y", "y8WEcpOlT3I"], "start_seconds": ["27", "40"], "properties": ["loud, bang, noise", "harsh, wind, blows"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking with wind noise in the background "], "question": "which entity is softer", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wqADXCzngMw", "sSMl2vc3ek"], "start_seconds": ["340", "20"], "properties": ["engine, idle, man", "loud, multiple, distance"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", null], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "a man speaks as a car is passing by"], "sample_ids": ["yajyRTUQk3U", "sK4u5T8hW78"], "start_seconds": ["400", "30"], "properties": ["a woman, something, fried", "a, car, pass"], "captions_pred_video": ["- a woman cooking in the kitchen", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "an animal quacks rapidly"], "sample_ids": ["vf9xf3vMsGM", "vh30P49Po6s"], "start_seconds": ["540", "30"], "properties": ["A man speaks while turning a water faucet on.", "animal, quacks, rapidly"], "captions_pred_video": ["of the person washing their hands under the faucet", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a duck is quacking loudly"], "question": "which entity is a person", "label": 0}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["xKB8O8LTs6s", "uYT5gxnyMWM"], "start_seconds": ["70", "50"], "properties": ["music, radio, gunshots", "female, spraying, scream"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking on a radio?", "label": 0}, {"captions": ["wind blows strongly and a young man speaks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vs65y4qmyBE", "uZesmtKZGSw"], "start_seconds": ["340", "250"], "properties": ["wind, blows, strongly", "men, talk, cars"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more likely to be in a city", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["sK4u5T8hW78", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["a, car, pass", "water, radio, man"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a clock ticktocks"], "sample_ids": ["sfAvvZwdLCY", "v-g-j2uTByM"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of the toilet in the bathroom", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a toilet is flushed", "a clock is ticking loudly"], "question": "which entity is a timepiece", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "water splashes and a motorboat passes as people yell"], "sample_ids": ["vSeGhaZt-aI", "w5W5Kqtc8E"], "start_seconds": ["50", "100"], "properties": ["water, bubbles, run", "water, splashes, motorboat"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", null], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks as bees buzz and birds chirp", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["t25U-v4k4ts", "y8WEcpOlT3I"], "start_seconds": ["40", "40"], "properties": ["bees buzz, birds chirp, man speaks", "harsh, wind, blows"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a man is speaking with wind noise in the background "], "question": "which entity is more likely to be in a calm environment", "label": 0}, {"captions": ["a clock ticktocks", "a vehicle accelerates and squeals tires"], "sample_ids": ["v-g-j2uTByM", "yRx9txMcBl0"], "start_seconds": ["30", "40"], "properties": ["ticktocks, clock, ticktocks", "accelerates, tires, squeals"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a clock is ticking loudly", "a car is revving its engine and skidding "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["an engine runs and a man speaks", "a train horn sounds as a railroad passing bell rings"], "sample_ids": ["yT5WfYMRr-U", "zgUgkpk78xU"], "start_seconds": ["30", "70"], "properties": ["engine, run, man", "horn, bell, train"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a train blows its horn as it speeds down the tracks "], "question": "which train is going to pass the other train?", "label": 1}, {"captions": ["a airplane flies overhead as a woman speaks", "a propeller rotates loudly and intensely"], "sample_ids": ["zj2R0XoFr5k", "ugHJF0hfYkg"], "start_seconds": ["50", "10"], "properties": ["airplane, fly, woman", "loud, intense, propeller"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "people speak as gunfire rings out"], "sample_ids": ["wyllXV6PjKo", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["a kid, talk, cry", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman speaks and a baby cries", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "birds chirp and objects are moved around"], "sample_ids": ["zl9Dqx-j7q4", "yPUYU6t3rwo"], "start_seconds": ["6", "370"], "properties": ["engine, laugh, loud", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of a man driving a car in the dark", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a jet engine roars ", "insects buzz and a man speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "paper folding and crinkling"], "sample_ids": ["t97k0cejSQE", "zPpG3RD8lSs"], "start_seconds": ["250", "20"], "properties": ["bird, chirp, insect", "paper, fold, crinkle"], "captions_pred_video": ["a bee on a purple thistle flower", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "the wind blows and a mouse clicks "], "question": "which entity is not a living thing", "label": 1}, {"captions": ["a baby cries and a woman speaks", "water flows and trickles"], "sample_ids": ["tMbMDvT50j8", "tB7hWb9gTuQ"], "start_seconds": ["12", "30"], "properties": ["a, cry, woman", "water, flow, trickle"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a baby cries and a woman speaks", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["y1saVTXsKwc", "w5W5Kqtc8E"], "start_seconds": ["80", "100"], "properties": ["a, dog, talk", "wind, blow, vehicle"], "captions_pred_video": ["a dog playing with a pink ball", null], "captions_pred_audio": ["a dog barks and a man speaks", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blowing?", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vbZ-0lGPneg", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["a woman, a television program, a bird", "applause, audience, yells"], "captions_pred_video": ["of a man holding a baby duck in his hands", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a man is speaking and a crowd is clapping"], "question": "which entity has more people", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "water pouring and bubbling"], "sample_ids": ["xZepNM9qcRA", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["background, motor, run", "water, bubbles, pouring"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "water is running from a faucet"], "question": "which entity is more active", "label": 1}, {"captions": ["a person snoring several times", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["spJCm8tD9Zo", "tdWhHV3X25Q"], "start_seconds": ["90", "60"], "properties": ["snore, person, several", "applause, audience, yells"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a woman and man are speaking", "small dogs yip and bark sharply"], "sample_ids": ["vbpKkWvfOu4", "v-wcQf4BDY0"], "start_seconds": ["560", "120"], "properties": ["two people, speaking, woman, man", "bark, yip, sharply"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a stream of water runs briefly", "some tunes played by whistling"], "sample_ids": ["x-PeY8Yb8M4", "u6BnG6YZqJ4"], "start_seconds": ["300", "0"], "properties": ["stream, water, run", "tune, play, whistling"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a car is driving on a wet road ", "a person whistling a song"], "question": "which entity is not a stream of water?", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "water rushes and then a vehicle zooms past"], "sample_ids": ["s4Uz1Ffgo04", "s4Uz1Ffgo04"], "start_seconds": ["100", "100"], "properties": ["water, rushes, motorcycle", "water, rushes, vehicle"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity shows a vehicle zooming past?", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "some men converse over an engine running"], "sample_ids": ["vzxHnu-SFEw", "sCiy7QS1U"], "start_seconds": ["80", "300"], "properties": ["two objects, woman, speak", "men, converse, engine"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", null], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which object is being rubbed together", "label": 0}, {"captions": ["a man speaks while water drains", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vSeGhaZt-aI", "uZesmtKZGSw"], "start_seconds": ["50", "250"], "properties": ["water, drain, man", "men, talk, cars"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a clock ticktocks continuously", "a motorcycle engine works nearby"], "sample_ids": ["vlJS7LN2XyM", "tOSWIURC-4"], "start_seconds": ["30", "0"], "properties": ["ticktocks, clock, ticktocks continuously", "engine, work, nearby"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", null], "captions_pred_audio": ["a ticktock of a clock", "a lawn mower is running "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a man talks as several small engines run", "ticking continues without interruption"], "sample_ids": ["u9A6VZQCZpU", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["a, man, talk", "ticking, continuous, clock"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vXlk0lIQBFo", "uYT5gxnyMWM"], "start_seconds": ["470", "50"], "properties": ["wind, speak, vocalize", "a, scream, girl"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a woman is speaking and a baby is crying"], "question": "which entity is about a girl speaking followed by a scream?", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "pigeons vocalize and birds chirp"], "sample_ids": ["sapQIQUhFc", "uiS58TNyUiw"], "start_seconds": ["280", "430"], "properties": ["liquid, flow, distance", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["waves crash and wind blows", "rain falls loudly as the build up of thunder and lightning occurs in the distance"], "sample_ids": ["vwqaIHKxLvM", "wulOEFdECWs"], "start_seconds": ["20", "10"], "properties": ["wind, crash, wave", "rain, thunder, lightning"], "captions_pred_video": ["of a surfer riding a big wave in the ocean", "a view of the city at night from a window in the rain"], "captions_pred_audio": ["waves crash and wind blows ", "a heavy rain is falling on a surface"], "question": "which entity is more likely to cause damage", "label": 1}, {"captions": ["a power tool runs and touches a surface", "water splashes as an animal walks through"], "sample_ids": ["zfvPRf3chY", "w1ir-sZ3Im8"], "start_seconds": ["290", "90"], "properties": ["power tool, run, touch", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking while a power tool is being used ", "water splashes and gurgles as people speak"], "question": "which entity is not a power tool?", "label": 1}, {"captions": ["a mechanical buzzing getting louder", "small dogs yip and bark sharply"], "sample_ids": ["sEprKHm8Sj8", "v-wcQf4BDY0"], "start_seconds": ["90", "120"], "properties": ["noise, loud, buzzing", "bark, yip, sharply"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "people applaud and hoot and chat quietly"], "sample_ids": ["y2bVZ7rz-5M", "wwyfGO2J4"], "start_seconds": ["280", "90"], "properties": ["engine, horn, siren", "people, applaud, hoot"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", null], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "people are clapping and speaking with background noise "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tDlfY3nmx1A", "zj2R0XoFr5k"], "start_seconds": ["160", "50"], "properties": ["applause, laugh, man", "airplane, boy, fly"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a woman speaks while a helicopter flies overhead "], "question": "which entity has a boy speaking?", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "wind blows as people chatter quietly"], "sample_ids": ["w6RTHR6AeAg", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["call, owl, screech", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a toilet flushes and water drains", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sfAvvZwdLCY", "xBxDz0CFVn0"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "stream, water, flow"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage is blurry and out of focus"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking with wind noise in the background "], "question": "which entity is moving water", "label": 1}, {"captions": ["women speak and laugh as wind blows", "a person snores loudly multiple times at a close distance"], "sample_ids": ["un9VQlzgZM", "sSMl2vc3ek"], "start_seconds": ["5", "20"], "properties": ["wind, speak, laugh", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["ukg5L09Wpvo", "y2bVZ7rz-5M"], "start_seconds": ["150", "280"], "properties": ["a train, a horn, a bell", "motor noise, horn, siren"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a truck is honking its horn and a siren is blaring "], "question": "which entity has a horn and a bell?", "label": 0}, {"captions": ["a child and woman laughs and the woman speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["uPDn2BFTHk", "wz7N8YRy74I"], "start_seconds": ["140", "30"], "properties": ["woman, laughs, speaks", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in it?", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wRBHTgrbiwg", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["bird, owl, speak", "three men, wind, flow"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking briefly?", "label": 0}, {"captions": ["birds coo incessantly", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["yZrFNS7GFBQ", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["coo, bird, incessant", "clickety-clack, train, whistle"], "captions_pred_video": ["of the bird in the cage", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["an owl hoots in the background ", "a train blows its whistle and blows its horn "], "question": "which is continuous", "label": 0}, {"captions": ["a woman speaks as she rubs two objects together", "water splashes as an animal walks through"], "sample_ids": ["vzxHnu-SFEw", "w1ir-sZ3Im8"], "start_seconds": ["80", "90"], "properties": ["two objects, woman, speak", "animal, water, splashes"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks, then dials a rotary telephone", "paper is repeatedly crumpled and crinkled"], "sample_ids": ["tK4VlLsNxak", "vms5XGTDVQc"], "start_seconds": ["120", "220"], "properties": ["a, dial, telephone", "paper, crumpled, crinkled"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "footage of a woman opening a black bag on a table"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "paper is crumpled and crinkled"], "question": "which is not a rotary telephone", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "water splashes as an animal walks through"], "sample_ids": ["sTpirNYo8vQ", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["a, tone, fast", "animal, water, splashes"], "captions_pred_video": ["of a man taking a selfie on a bus", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["frogs croak and vocalize", "some men converse over an engine running"], "sample_ids": ["yswmmRZFItk", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["croak, vocalize, frog", "men, converse, engine"], "captions_pred_video": ["a close up of a frog in the water", null], "captions_pred_audio": ["a frog is croaking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a human activity", "label": 1}, {"captions": ["a person speaks over rustling leaves", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["zOZleIRqZm4", "tDVADusiIoc"], "start_seconds": ["80", "60"], "properties": ["rustling, leaves, person", "water, radio, man"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["speaking following by laughing and clapping", "wind blows and people scream while an engine revs"], "sample_ids": ["u2f5NpsoHBg", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["person, laugh, clap", "wind, engine, scream"], "captions_pred_video": ["is being projected on a screen at the front of the stage", null], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is more likely to be a movie", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "birds chirp, a woman speaks, and insects buzz"], "sample_ids": ["sjlVMgdGSK0", "t97k0cejSQE"], "start_seconds": ["30", "250"], "properties": ["car, revving, loudly", "sound, chirp, buzz"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "a bee on a purple thistle flower"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a bee buzzes and a woman speaks"], "question": "which entity is quieter", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "vehicles pass by on a roadway"], "sample_ids": ["sShpyu2l4YQ", "tgbONvsP47Y"], "start_seconds": ["0", "0"], "properties": ["growl, bark, yip", "pass, vehicle, roadway"], "captions_pred_video": ["the puppies are playing with a toy", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a dog is barking and growling", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["u5RmF3c3Aw", "zFjIWfSD-4"], "start_seconds": ["60", "410"], "properties": ["engine, car, zoom", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a car zooming by?", "label": 0}, {"captions": ["a clang followed by a toilet flushing", "pigeons vocalize and birds chirp"], "sample_ids": ["wNZ5thZM7XU", "uiS58TNyUiw"], "start_seconds": ["20", "430"], "properties": ["sound, flush, toilet", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a toilet in a bathroom stall", "of the pigeon in the cage"], "captions_pred_audio": ["a toilet flushes", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "a clock ticktocks"], "sample_ids": ["vfYTJq7nU", "v-g-j2uTByM"], "start_seconds": ["130", "30"], "properties": ["ducks, quack, man", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a dog barks and whimpers", "wind blows and people scream while an engine revs"], "sample_ids": ["sShpyu2l4YQ", "w5W5Kqtc8E"], "start_seconds": ["0", "100"], "properties": ["barks, whimpers, dog", "wind, engine, scream"], "captions_pred_video": ["the puppies are playing with a toy", null], "captions_pred_audio": ["a dog is barking and growling", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is more quiet", "label": 0}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vW4x7S1VfQc", "tiDFTC-5vU"], "start_seconds": ["150", "30"], "properties": ["clacking, oil, woman", "male, duck, laugh"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", null], "captions_pred_audio": ["food sizzles in a frying pan", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking?", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "some tunes played by whistling"], "sample_ids": ["vXlk0lIQBFo", "u6BnG6YZqJ4"], "start_seconds": ["470", "0"], "properties": ["wind, speak, vocalize", "tune, play, whistling"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["tOSWIURC-4", "ukg5L09Wpvo"], "start_seconds": ["0", "150"], "properties": ["engine, work, nearby", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a lawn mower is running ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a vehicle engine runs while a siren and horn sound", "some men converse over an engine running"], "sample_ids": ["u--KhUW8l1Y", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["engine, sound, horn", "men, converse, engine"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", null], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a running engine", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "a young woman speaks over spraying and another person yells"], "sample_ids": ["wy1eKjR7KC0", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["people, talk, distance", "person, spray, yell"], "captions_pred_video": ["two police officers riding motorcycles down the street", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and a siren is going off", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "a stream of water runs briefly"], "sample_ids": ["tiDFTC-5vU", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["male, duck, laugh", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["weDbePuc-Xc", "zj2R0XoFr5k"], "start_seconds": ["40", "50"], "properties": ["music, slaps, human", "airplane, boy, fly"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a woman sneezes then speaks", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["x4dZyf9Gbj0", "y8WEcpOlT3I"], "start_seconds": ["130", "40"], "properties": ["sneezes, speaks, woman", "harsh, wind, blows"], "captions_pred_video": ["footage is blurry and out of focus", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a woman sneezes and speaks", "a man is speaking with wind noise in the background "], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "plastic is tapped on while someone speaks"], "sample_ids": ["y8dSeubCNI", "wvKpEYswXO0"], "start_seconds": ["4", "150"], "properties": ["men, women, car", "plastic, tap, speak"], "captions_pred_video": [null, "of the person preparing food in the kitchen"], "captions_pred_audio": ["an engine revving and people talking in the background", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is quieter", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "a baby laughs giddily and a woman laughs then speaks"], "sample_ids": ["y8dSeubCNI", "wjsXBsc7M40"], "start_seconds": ["4", "10"], "properties": ["men, women, car", "a baby laughs, a woman laughs, a woman speaks"], "captions_pred_video": [null, "footage of the baby playing with a toothbrush"], "captions_pred_audio": ["an engine revving and people talking in the background", "a baby laughs and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "people cheer as a vehicle engine revs"], "sample_ids": ["wDVMhEdTiVw", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["gun, shoot, water", "engine revs, vehicle, people"], "captions_pred_video": ["a blurry image of trees and water in the forest", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a kid speaks followed by music playing", "a man speaks as crickets sing"], "sample_ids": ["tQWGZLItBXk", "ryFDPxgDOGc"], "start_seconds": ["170", "570"], "properties": ["music, kid, speak", "a, crickets, sing"], "captions_pred_video": ["worms revolution screenshots", "a group of people dressed in camouflage and hunting gear in the dark"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a man is speaking with crickets chirping in the background"], "question": "which entity has a man speaking as crickets sing?", "label": 1}, {"captions": ["a car accelerates and wind blows", "a motor runs steadily as a man speaks, then the motor revs twice"], "sample_ids": ["u0TrcHhkPQ", "ylpYOorfH4o"], "start_seconds": ["20", "410"], "properties": ["accelerates, wind, blows", "motor, run, steady"], "captions_pred_video": [null, "for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and an engine is revving"], "question": "which is a moving object", "label": 0}, {"captions": ["a railroad crossing bell rings as a train horn blows", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["tZGN5a7ybxo", "su6FAOcOA8c"], "start_seconds": ["60", "4"], "properties": ["ring, train, horn", "engine, idle, woman"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a train is moving and blowing its horn ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["wDVMhEdTiVw", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["gun, shoot, water", "engine, idle, woman"], "captions_pred_video": ["a blurry image of trees and water in the forest", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["an adult woman speaks over chopping and silverware noises", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["yYJksgsxx5U", "tw76HGONaKg"], "start_seconds": ["30", "570"], "properties": ["audio, woman, silverware", "A, game, keyboard"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a man speaks and types on a computer keyboard "], "question": "which entity is a video?", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "a flush is followed by gurgling water, then another flush"], "sample_ids": ["vfYTJq7nU", "tqR406bGiE"], "start_seconds": ["130", "40"], "properties": ["rustling, ducks, quack", "flush, water, gurgle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a duck quacks and a woman speaks", "a toilet is flushed"], "question": "which entity is more likely to be a video of a toilet flushing?", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "someone is typing on a computer keyboard"], "sample_ids": ["zofjfKhqLk8", "v0x1odnXtP0"], "start_seconds": ["10", "210"], "properties": ["noise, stop, motor", "keyboard, type, computer"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "how to make money on youtube in spanish"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "water splashes and a door squeaks"], "sample_ids": ["sxYkFKFIZD0", "sdXV-ylviw"], "start_seconds": ["20", "190"], "properties": ["screech, man, door", "sound, splash, door"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", null], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a dog barks and taps with background noise "], "question": "which door is squeaking", "label": 1}, {"captions": ["a person is whistling a tune", "a infant makes noise and is excited"], "sample_ids": ["scYRUkrFLiQ", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["a, tune, whistle", "noise, excited, infant"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a person whistling a song", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "winds blows roughly as a vehicle races past"], "sample_ids": ["w0xsN8X18Y", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["music, surface, rain", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a jet engine roars and wind blows "], "question": "which entity is more likely to be a storm", "label": 1}, {"captions": ["an insect buzzes around continuously", "someone is typing on a computer keyboard"], "sample_ids": ["v25l1jef3JY", "v0x1odnXtP0"], "start_seconds": ["0", "210"], "properties": ["buzzes, continuously, insect", "keyboard, type, computer"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "how to make money on youtube in spanish"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a person is typing on a keyboard"], "question": "which entity is not a person", "label": 0}, {"captions": ["a woman speaks happily and an animal chirps", "multiple people speak and children yell while water gurgles"], "sample_ids": ["uWAAAL4CIoc", "vb1fPSDI4c"], "start_seconds": ["0", "30"], "properties": ["a woman, chirps, animal", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a machine runs continuously", "water pouring and bubbling"], "sample_ids": ["wdXV3Pv0jiY", "uyRfq-jKPpo"], "start_seconds": ["11", "50"], "properties": ["machine, running, continuously", "water, bubbles, pouring"], "captions_pred_video": ["footage is blurry and shaky", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "water is running from a faucet"], "question": "which entity is not running continuously", "label": 1}, {"captions": ["scraping and female speech with distant music", "a infant makes noise and is excited"], "sample_ids": ["yHeVV-xeOxQ", "wIJK3-5y0kA"], "start_seconds": ["130", "30"], "properties": ["female, speech, music", "noise, excited, infant"], "captions_pred_video": ["of a girl milking a goat's udder", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "a crowd yells, reacts and applauds"], "sample_ids": ["zY3icUyMdh8", "wztCSUxOf8"], "start_seconds": ["20", "130"], "properties": ["dog, bark, engine", "a crowd, yells, applauds"], "captions_pred_video": ["footage of a bus driving through a residential street at night", null], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a man is speaking and a crowd is clapping"], "question": "which entity is more likely to be at a sporting event", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["wRV8yMk886E", "xfaoyyzw2WU"], "start_seconds": ["0", "180"], "properties": ["liquid, spray, nozzle", "loud, jet engine, roar"], "captions_pred_video": ["two cars are parked in a parking lot at night", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man speaks followed by a loud burst", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "birds chirp quietly and an adult man speaks"], "sample_ids": ["y8dSeubCNI", "zuua6-5goWw"], "start_seconds": ["4", "30"], "properties": ["men, women, car", "birds, chirp, quiet, man, speaks"], "captions_pred_video": [null, "in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot"], "captions_pred_audio": ["an engine revving and people talking in the background", "birds are chirping and a man is speaking with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["tDVADusiIoc", "ziUT9IFTkjg"], "start_seconds": ["60", "10"], "properties": ["water, radio, man", "background, birds, rustling"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "three men talk while wind blows and some liquid flows"], "sample_ids": ["xl2PIWyXaM", "vJ7JPEFhyLA"], "start_seconds": ["160", "16"], "properties": ["chirp, man, younger person", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["birds are chirping and people are talking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["a man talks as several small engines run", "music plays, a person speaks, followed by whooshes and a ding"], "sample_ids": ["u9A6VZQCZpU", "tQWGZLItBXk"], "start_seconds": ["30", "170"], "properties": ["a, man, talk", "music, person, ding"], "captions_pred_video": [null, "worms revolution screenshots"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a child speaks music plays video game sounds sound effects and sound effects play "], "question": "which entity has a person speaking?", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "a man speaks as a car is passing by"], "sample_ids": ["sa6TLVbooCc", "sK4u5T8hW78"], "start_seconds": ["240", "30"], "properties": ["people, laugh, child", "a, car, pass"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["tQWGZLItBXk", "wDVMhEdTiVw"], "start_seconds": ["170", "30"], "properties": ["music, person, ding", "gun, shoot, water"], "captions_pred_video": ["worms revolution screenshots", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a game", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "white noise and snoring with some rustling in the background"], "sample_ids": ["x5cuQjOdM3E", "xzKKf9bKNUo"], "start_seconds": ["30", "10"], "properties": ["cat, talk, meow", "background, noise, snoring"], "captions_pred_video": ["a black background with an airplane flying in the sky", "shows a woman laying on a bed with her eyes closed and her mouth open"], "captions_pred_audio": ["a cat meows and a woman speaks", "a person snoring loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a man speaks as a car is passing by"], "sample_ids": ["sK4u5T8hW78", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["a, car, pass", "a, car, pass"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a photograph", "label": 1}, {"captions": ["wind blows strongly and a young man speaks", "a car accelerates and wind blows"], "sample_ids": ["vs65y4qmyBE", "u0TrcHhkPQ"], "start_seconds": ["340", "20"], "properties": ["wind, blows, strongly", "accelerates, wind, blows"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "an adult male speaks and dials a rotary phone"], "sample_ids": ["tDVADusiIoc", "tK4VlLsNxak"], "start_seconds": ["60", "120"], "properties": ["man, radio, blows", "An adult male speaks, dials, and speaks into a rotary phone"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "person is wearing a headset and holding a remote control in his hand"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking and using a sewing machine"], "question": "which entity is a man?", "label": 0}, {"captions": ["a young woman speaks over spraying and another person yells", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["uYT5gxnyMWM", "tDVADusiIoc"], "start_seconds": ["50", "60"], "properties": ["person, spray, yell", "water, radio, man"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a person speaking over spraying?", "label": 0}, {"captions": ["females talk and laugh over gusting wind", "a man talks as something metal hits against and glass is set down"], "sample_ids": ["un9VQlzgZM", "x6ijhqRY38s"], "start_seconds": ["5", "250"], "properties": ["females, talk, laugh", "something metal, glass, hit"], "captions_pred_video": [null, "a chef preparing a dish with a bottle of wine and a plate of food on a table"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a man is speaking and dishes are clanging "], "question": "which entity is about a man talking?", "label": 1}, {"captions": ["the revving of an engine throttle followed by a man speaking", "small dogs yip and bark sharply"], "sample_ids": ["tezvROoo4bs", "v-wcQf4BDY0"], "start_seconds": ["40", "120"], "properties": ["audio, throttle, speaking", "bark, yip, sharply"], "captions_pred_video": ["footage of a busy city street with cars parked on both sides of the road", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a car accelerates and revs while a man speaks ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["xyL9F5VrjkE", "w5W5Kqtc8E"], "start_seconds": ["20", "100"], "properties": ["engine, run, wind", "wind, blow, vehicle"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", null], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a motorboat is moving and people are shouting and cheering "], "question": "which vehicle is running an engine?", "label": 0}, {"captions": ["multiple ducks quack continuously", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wfHeoPDLMaM", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["multiple, quack, continuously", "airplane, boy, fly"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["ducks are quacking", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["wind blows strongly", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["w8uLijTqtlU", "zj2R0XoFr5k"], "start_seconds": ["70", "50"], "properties": ["wind, blows, strongly", "airplane, boy, fly"], "captions_pred_video": ["footage is blurry and shaky", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["the wind is blowing strongly", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "birds chirp and objects are moved around"], "sample_ids": ["vZAw4apG0Es", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["people, clock, converse", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a clock is ticking and people are talking", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "water flows as men speak and yell"], "sample_ids": ["zsLxS-uLJTw", "vJ7JPEFhyLA"], "start_seconds": ["20", "16"], "properties": ["horn, blast, train", "water, flow, men"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a train passing?", "label": 0}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["wyllXV6PjKo", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["a baby, a woman, a man", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a woman speaks and a baby cries", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["a helicopter engine idles continuously", "wind blows as people chatter quietly"], "sample_ids": ["ugHJF0hfYkg", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["engine, idle, continuously", "wind, chatter, people"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage is blurry and out of focus"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vzxHnu-SFEw", "sSMl2vc3ek"], "start_seconds": ["80", "20"], "properties": ["two objects, woman, speak", "loud, multiple, distance"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", null], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp and a dog breathes heavily", "a man speaks as a motor runs in the background"], "sample_ids": ["y2ZBGpgbhHM", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["dog, chirp, breathe", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["birds chirping and a dog panting", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "two men speak as a buffeting wind blows"], "sample_ids": ["vSeGhaZt-aI", "y8WEcpOlT3I"], "start_seconds": ["50", "40"], "properties": ["water, bubbles, speak", "wind, speak, buffeting"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is more likely to be in a calm environment", "label": 0}, {"captions": ["people clap and speak in the distance", "a person is snoring while sleeping"], "sample_ids": ["wwyfGO2J4", "vJrjSeP17yE"], "start_seconds": ["90", "40"], "properties": ["clap, distance, speak", "a person is sleeping, snoring, person"], "captions_pred_video": [null, "a black background with a small plane flying in the sky"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a person snoring loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["wudZTNBtVqc", "uYT5gxnyMWM"], "start_seconds": ["60", "50"], "properties": ["accelerates, engine, wind", "female, spraying, scream"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a horn honks and then loudly blares", "a infant makes noise and is excited"], "sample_ids": ["wnpJndXuxLc", "wIJK3-5y0kA"], "start_seconds": ["50", "30"], "properties": ["horn, honk, loud", "noise, excited, infant"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "multiple people speak and children yell while water gurgles"], "sample_ids": ["w34HjHr6gAY", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["beeps, squawk, child speaking", "multiple, people, yell"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", null], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["wvKpEYswXO0", "y2bVZ7rz-5M"], "start_seconds": ["150", "280"], "properties": ["sound, water, running", "motor noise, horn, siren"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a truck is honking its horn and a siren is blaring "], "question": "which entity has a horn honking?", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "a car accelerates and wind blows"], "sample_ids": ["wqZ135Ssz0", "u0TrcHhkPQ"], "start_seconds": ["60", "20"], "properties": ["man, woman, squawks", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "water splashes as an animal walks through"], "sample_ids": ["sHbXC6na9hg", "w1ir-sZ3Im8"], "start_seconds": ["0", "90"], "properties": ["a person, saw, wood", "animal, water, splashes"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["an engine is idling and vibrating", "water splashes and gurgles as people speak"], "question": "which entity is not a person?", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "a man speaks as a car is passing by"], "sample_ids": ["sjlVMgdGSK0", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["car, revving, loudly", "a, car, pass"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking with background noise and breathing sounds "], "question": "which car is revving loudly", "label": 0}, {"captions": ["various birds chirp and squeal, and an animal grunts", "wind blowing followed by a zoom"], "sample_ids": ["tDlysoZiA1I", "vr8ZXjEBhMQ"], "start_seconds": ["0", "150"], "properties": ["animal, grunts, chirps", "wind, blow, zoom"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is silent", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "a man speaks as a car is passing by"], "sample_ids": ["sEprKHm8Sj8", "sK4u5T8hW78"], "start_seconds": ["90", "30"], "properties": ["car, tires, slows", "a, car, pass"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking with background noise and breathing sounds "], "question": "which car is going faster", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["uOpoD0gGXcs", "zj2R0XoFr5k"], "start_seconds": ["120", "50"], "properties": ["chirps, woman, bird", "airplane, boy, fly"], "captions_pred_video": ["a herd of cows grazing in the field", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["birds are chirping and a man is speaking", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "wind blowing followed by a zoom"], "sample_ids": ["ylpYOorfH4o", "vr8ZXjEBhMQ"], "start_seconds": ["410", "150"], "properties": ["engine, running, wind", "wind, blow, zoom"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking and an engine is revving", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom?", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a stream runs then someone speaks"], "sample_ids": ["x5cuQjOdM3E", "wbHTKEJZyhc"], "start_seconds": ["30", "20"], "properties": ["cat, meows, young woman", "stream, run, someone"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and"], "captions_pred_audio": ["a cat meows and a woman speaks", "a waterfall is flowing and people are speaking "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["s4Uz1Ffgo04", "su6FAOcOA8c"], "start_seconds": ["100", "4"], "properties": ["roars, background, people speaking", "engine, idle, woman"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a woman is speaking and a subway train is moving "], "question": "which entity is quieter", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a man speaks as a car is passing by"], "sample_ids": ["yYEVLuqEytU", "sK4u5T8hW78"], "start_seconds": ["40", "30"], "properties": ["animal, pig, background", "a, car, pass"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["women speak and laugh as wind blows", "a clock ticktocks"], "sample_ids": ["un9VQlzgZM", "v-g-j2uTByM"], "start_seconds": ["5", "30"], "properties": ["wind, speak, laugh", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["wP8ZKrlx3oA", "vfYTJq7nU"], "start_seconds": ["40", "130"], "properties": ["rain, storm, thunder", "rustling, ducks, quack"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", null], "captions_pred_audio": ["a heavy rain is falling on a surface", "a duck quacks and a woman speaks"], "question": "which entity is more likely to be in a lake", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "crowd applause while a guy laughs followed by another man speaking"], "sample_ids": ["uiItxDsDMFI", "tDlfY3nmx1A"], "start_seconds": ["30", "160"], "properties": ["wood, piece, saw", "applause, laugh, man"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "a man in a suit and tie is talking to another man in a suit and tie"], "captions_pred_audio": ["a saw is being used with background noise ", "a crowd is clapping and laughing and a man is speaking "], "question": "which entity is a man speaking to a crowd?", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "an airplane engine runs"], "sample_ids": ["x9JovgqUcs", "yVPZ2MNWpms"], "start_seconds": ["500", "0"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "engine, airplane, runs"], "captions_pred_video": [null, "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a car is driving by on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "a machine beeps continuously"], "sample_ids": ["yRx9txMcBl0", "y682ml90jGw"], "start_seconds": ["40", "11"], "properties": ["accelerates, tires, squeals", "beeps, machine, continuously"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", null], "captions_pred_audio": ["a car is revving its engine and skidding ", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["a toilet flushes and water drains", "two women and a man talk while a kid cries"], "sample_ids": ["sfAvvZwdLCY", "wyllXV6PjKo"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "a kid, talk, cry"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a woman speaks and a baby cries"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "winds blows roughly as a vehicle races past"], "sample_ids": ["tDVADusiIoc", "xjvTpk2Zpr8"], "start_seconds": ["60", "70"], "properties": ["man, radio, blows", "wind, blows, vehicle"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a jet engine roars and wind blows "], "question": "which entity shows a vehicle racing past?", "label": 1}, {"captions": ["some people speak", "vehicles pass by on a roadway"], "sample_ids": ["vbZ-0lGPneg", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "pass, vehicle, roadway"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a car is driving on the road "], "question": "which entity is more likely to be in motion", "label": 1}, {"captions": ["multiple ducks quack continuously", "pigeons vocalize and birds chirp"], "sample_ids": ["wfHeoPDLMaM", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["multiple, quack, continuously", "vocalize, bird, chirp"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "of the pigeon in the cage"], "captions_pred_audio": ["ducks are quacking", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sWZzXuWYY", "wz7N8YRy74I"], "start_seconds": ["420", "30"], "properties": ["male, clanks, thumps", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["xKB8O8LTs6s", "ziUT9IFTkjg"], "start_seconds": ["70", "10"], "properties": ["music, gunfire, explosion", "background, birds, rustling"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", null], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "birds are chirping and a chime is ringing "], "question": "which entity is more peaceful", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["zcDwZ6W7E3E", "wwyfGO2J4"], "start_seconds": ["180", "90"], "properties": ["man, speak, motorcycles", "people, applaud, hoot"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", null], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "birds chirp and objects are moved around"], "sample_ids": ["x6ijhqRY38s", "yPUYU6t3rwo"], "start_seconds": ["250", "370"], "properties": ["bowl, silverware, man", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "insects buzz and a man speaks"], "question": "which entity is about moving objects around", "label": 1}, {"captions": ["a large crowd cheers and applauds", "an infant crying frantically"], "sample_ids": ["rqfQRErjfk8", "zwOBqeFTgiU"], "start_seconds": ["170", "30"], "properties": ["crowd, cheers, applauds", "cry, infant, frantically"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "of the baby crying in the car seat"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a cat meows and children speak", "a child speaks in closed space"], "sample_ids": ["x5cuQjOdM3E", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["cat, speak, children", "child, space, speak"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a cat meows and a woman speaks", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "people applaud and hoot and chat quietly"], "sample_ids": ["yRx9txMcBl0", "wwyfGO2J4"], "start_seconds": ["40", "90"], "properties": ["accelerates, tires, squeals", "people, applaud, hoot"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", null], "captions_pred_audio": ["a car is revving its engine and skidding ", "people are clapping and speaking with background noise "], "question": "which entity is a performance", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["ugHJF0hfYkg", "yajyRTUQk3U"], "start_seconds": ["10", "400"], "properties": ["engine, running, continuously", "a woman, something, fried"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of something being fried?", "label": 1}, {"captions": ["a woman sneezes then speaks", "a frog croaks as other frogs croak in the background"], "sample_ids": ["x4dZyf9Gbj0", "yswmmRZFItk"], "start_seconds": ["130", "0"], "properties": ["sneezes, speaks, woman", "background, frog, croak"], "captions_pred_video": ["footage is blurry and out of focus", "a close up of a frog in the water"], "captions_pred_audio": ["a woman sneezes and speaks", "a frog is croaking"], "question": "which entity is a croaking animal?", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "some tunes played by whistling"], "sample_ids": ["sYITalLZjj4", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["water, rushes, background, birds", "tune, play, whistling"], "captions_pred_video": ["two ducks are swimming in the water near each other", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["wind blows and birds chirp", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["uiItxDsDMFI", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["wood, piece, saw", "engine, revs, vehicle"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a saw is being used with background noise ", "a race car accelerates and revs its engine "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["tQWGZLItBXk", "vbZ-0lGPneg"], "start_seconds": ["170", "30"], "properties": ["voice, music, whoosh", "a woman, a television program, a bird"], "captions_pred_video": ["worms revolution screenshots", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["a woman sneezes then speaks", "a woman speaks as she rubs two objects together"], "sample_ids": ["x4dZyf9Gbj0", "vzxHnu-SFEw"], "start_seconds": ["130", "80"], "properties": ["sneezes, speaks, woman", "two objects, woman, speak"], "captions_pred_video": ["footage is blurry and out of focus", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a woman sneezes and speaks", "a woman is speaking and breathing with mechanisms in the background "], "question": "which woman speaks as she rubs two objects together?", "label": 1}, {"captions": ["a helicopter engine runs", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["t5ZbXbniOWk", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["engine, helicopter, run", "engine, idle, woman"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and a subway train is moving "], "question": "which entity has a running engine", "label": 0}, {"captions": ["a person sniffles and sneezes", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["uRlbY6aoBU", "vfYTJq7nU"], "start_seconds": ["0", "130"], "properties": ["sneezes, sniffles, person", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is sneezing ", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["a motorcycle engine is idling", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vZAqdHZ81yA", "wDVMhEdTiVw"], "start_seconds": ["180", "30"], "properties": ["engine, motorcycle, idling", "gun, shoot, water"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["an engine is idling loudly", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is not a motorcycle?", "label": 1}, {"captions": ["a cat meows and children speak", "an insect buzzes around continuously"], "sample_ids": ["x5cuQjOdM3E", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["cat, speak, children", "buzzes, continuously, insect"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a cat meows and a woman speaks", "a fly is buzzing around a microphone "], "question": "which entity is more active", "label": 1}, {"captions": ["an adult woman speaks over chopping and silverware noises", "people cheer as a vehicle engine revs"], "sample_ids": ["yYJksgsxx5U", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["audio, woman, silverware", "engine revs, vehicle, people"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["rain falls on a surface as men speak and thunder roars", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["w0xsN8X18Y", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["rain, thunder, surface", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is not a gun?", "label": 0}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "a woman speaks and dog vocalizes"], "sample_ids": ["zj2R0XoFr5k", "uWAAAL4CIoc"], "start_seconds": ["50", "0"], "properties": ["airplane, boy, fly", "a, dog, vocalize"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", null], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a woman is speaking and a dog is barking "], "question": "which entity is a dog?", "label": 1}, {"captions": ["a low rumbling in the distance followed by a motorcycle engine revving up", "wind blows as people chatter quietly"], "sample_ids": ["vr8ZXjEBhMQ", "xBxDz0CFVn0"], "start_seconds": ["150", "30"], "properties": ["sound, distance, engine", "wind, chatter, people"], "captions_pred_video": ["is taken from a motorcycle's point of view", "footage is blurry and out of focus"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "a man speaks as a car is passing by"], "sample_ids": ["zY3icUyMdh8", "sK4u5T8hW78"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "a, car, pass"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more passive", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a child speaks in closed space"], "sample_ids": ["xSKJGCItUWE", "yW6FWLSLkx4"], "start_seconds": ["10", "40"], "properties": ["engine, run, boy", "child, space, speak"], "captions_pred_video": ["footage of the helicopter flying in the room", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vlJS7LN2XyM", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["background, clocks, ticking", "stream, water, flow"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage is blurry and out of focus"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "a helicopter engine idles continuously"], "sample_ids": ["vs65y4qmyBE", "ugHJF0hfYkg"], "start_seconds": ["340", "10"], "properties": ["engine, run, man", "engine, idle, continuously"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a helicopter is flying overhead "], "question": "which entity has a running engine", "label": 0}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a man speaks as a vehicle engine idles"], "sample_ids": ["yLy-WycbVVE", "shmR4OZtzqA"], "start_seconds": ["30", "30"], "properties": ["background, people, talk", "man, engine, idle"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a man speaks while a motor runs"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person is burping while a girl speaks", "a stream of water runs briefly"], "sample_ids": ["vdoxuJn9lTc", "x-PeY8Yb8M4"], "start_seconds": ["40", "300"], "properties": ["person, burp, girl", "stream, water, run"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a child speaks followed by a burp", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a beep occurs briefly", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["xtWeJ56-U-g", "uYT5gxnyMWM"], "start_seconds": ["20", "50"], "properties": ["beep, occur, briefly", "a, scream, girl"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["birds chirp then an animal grunts", "people speak as gunfire rings out"], "sample_ids": ["tDlysoZiA1I", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["animal, grunt, chirp", "gunfire, ring, speak"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "a woman speaks happily and an animal chirps"], "sample_ids": ["zcDwZ6W7E3E", "uWAAAL4CIoc"], "start_seconds": ["180", "0"], "properties": ["man, speak, motorcycles", "a woman, chirps, animal"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", null], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a woman is speaking and a dog is barking "], "question": "which entity has a more active animal", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "a train horn blows as it passes by"], "sample_ids": ["sAam2NqGhLY", "zVacuqSb4LI"], "start_seconds": ["20", "30"], "properties": ["snoring, breathing, child", "horn, blows, train"], "captions_pred_video": ["of a little girl sleeping on a couch", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a person is snoring", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is not a person", "label": 1}, {"captions": ["food is frying then a woman speaks", "a stream of water runs briefly"], "sample_ids": ["ukxt9I7eMMg", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["food, woman, speak", "stream, water, run"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["water is sprayed across a hard surface", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sQwlkXjQabo", "sLUnaPT5gM8"], "start_seconds": ["10", "0"], "properties": ["water, spray, surface", "loud, laughter, intermittent"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["spraying followed by silence", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "paper is crumpling consistently"], "sample_ids": ["wDVMhEdTiVw", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["gun, shoot, water", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a blurry image of trees and water in the forest", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a propeller moves loudly nearby", "motors runs briefly and tires screech"], "sample_ids": ["ugHJF0hfYkg", "yRx9txMcBl0"], "start_seconds": ["10", "40"], "properties": ["loud, propeller, move", "motors, tires, screech"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a helicopter is flying overhead ", "a car is revving its engine and skidding "], "question": "which is louder", "label": 1}, {"captions": ["birds chirp, a woman speaks, and insects buzz", "paper is crumpling consistently"], "sample_ids": ["t97k0cejSQE", "v5cSxLaHADY"], "start_seconds": ["250", "0"], "properties": ["sound, chirp, buzz", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a bee on a purple thistle flower", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "wind blows as people chatter quietly"], "sample_ids": ["tDVADusiIoc", "xBxDz0CFVn0"], "start_seconds": ["60", "30"], "properties": ["wind, radio, waves", "wind, chatter, people"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["an engine runs and wind blows", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vs65y4qmyBE", "vfYTJq7nU"], "start_seconds": ["340", "130"], "properties": ["engine, run, wind", "rustling, ducks, quack"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["someone whistles a song", "gunshots ring out, a man yells, and more shots follow"], "sample_ids": ["sIXTftIuUgw", "vKrYfzleLB8"], "start_seconds": ["90", "110"], "properties": ["someone, song, whistle", "a, ring, gunshots"], "captions_pred_video": [null, "stock footage of a person holding a gun in their hand"], "captions_pred_audio": ["a person whistling a song", "a man is speaking with background noise and a cap gun is fired "], "question": "which entity is more quiet", "label": 0}, {"captions": ["sawing of wood and rustling with leaves blowing in the distance", "a toilet flushes and water drains"], "sample_ids": ["uiItxDsDMFI", "sfAvvZwdLCY"], "start_seconds": ["30", "20"], "properties": ["sound, distance, leaves", "water drains, flushes, water"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a saw is being used with background noise ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["goats bleat and metal clings", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["tH17JPjDPnc", "uZesmtKZGSw"], "start_seconds": ["260", "250"], "properties": ["bleat, metal, clings", "men, talk, cars"], "captions_pred_video": ["feed of the goats eating hay in the barn", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["water flows and trickles", "a woman talking as an infant is crying"], "sample_ids": ["tB7hWb9gTuQ", "tMbMDvT50j8"], "start_seconds": ["30", "12"], "properties": ["water, flow, trickle", "a, talk, infant"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "shows a little girl covering her face with her hands while sitting at a table"], "captions_pred_audio": ["water is splashing and gurgling", "a baby cries and a woman speaks"], "question": "which entity is a human activity", "label": 1}, {"captions": ["a man speaks as water trickles down a stream", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["sapQIQUhFc", "uEU-Hg5MTN8"], "start_seconds": ["280", "27"], "properties": ["water, stream, trickles", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["a toilet flushes and water sputters as it drains", "a person snores loudly multiple times at a close distance"], "sample_ids": ["smGI3C1NZc", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["water, drain, toilet", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a toilet is flushed", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "a child speaks in closed space"], "sample_ids": ["weDbePuc-Xc", "yW6FWLSLkx4"], "start_seconds": ["40", "40"], "properties": ["cartoon character, music, vocalize", "child, space, speak"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "a duck quacks loudly and continuously"], "sample_ids": ["vdoxuJn9lTc", "vh30P49Po6s"], "start_seconds": ["40", "30"], "properties": ["burp, loud, girl", "loud, continuous, quacks"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a child speaks followed by a burp", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "an airplane engine roars increasingly louder"], "sample_ids": ["wjsXBsc7M40", "vBslzh7saPw"], "start_seconds": ["10", "90"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "engine, roar, louder"], "captions_pred_video": ["footage of the baby playing with a toothbrush", "a pickup truck carrying a large object down the road"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a jet engine roars and accelerates "], "question": "which entity is louder", "label": 1}, {"captions": ["a child speaks in closed space", "three men talk while wind blows and some liquid flows"], "sample_ids": ["yW6FWLSLkx4", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["child, space, speak", "three men, wind, flow"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more likely to be in a closed space", "label": 0}, {"captions": ["a man rubs two objects together then speaks", "birds chirp and objects are moved around"], "sample_ids": ["vveS8HT7Uog", "yPUYU6t3rwo"], "start_seconds": ["100", "370"], "properties": ["a man, objects, speak", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "insects buzz and a man speaks"], "question": "which entity is about birds?", "label": 1}, {"captions": ["paper folding and crinkling", "a infant makes noise and is excited"], "sample_ids": ["zPpG3RD8lSs", "wIJK3-5y0kA"], "start_seconds": ["20", "30"], "properties": ["paper, fold, crinkle", "noise, excited, infant"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a machine runs continuously", "a steam engine runs and whistles as it passes by"], "sample_ids": ["wdXV3Pv0jiY", "se87d6yxEOA"], "start_seconds": ["11", "10"], "properties": ["machine, running, continuously", "run, whistle, pass"], "captions_pred_video": ["footage is blurry and shaky", "footage of a train passing by a train station with smoke billowing out of the train's smokestack"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a train is moving and blowing its whistle "], "question": "which machine is running continuously", "label": 0}, {"captions": ["an animal growls followed by birds chirping", "small dogs yip and bark sharply"], "sample_ids": ["y2ZBGpgbhHM", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["animal, growl, bird", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["birds chirping and a dog panting", "a dog barks and growls"], "question": "which animal is more likely to be a dog", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["sQGXqGcwOTc", "zl9Dqx-j7q4"], "start_seconds": ["3", "6"], "properties": ["audio, kid, giggles", "engine, laugh, loud"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage of a man driving a car in the dark"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a jet engine roars "], "question": "which entity is followed by a man laughing", "label": 1}, {"captions": ["birds vocalize and chirp continuously", "a duck quacks continuously"], "sample_ids": ["w1mlz3Pe4fU", "vh30P49Po6s"], "start_seconds": ["300", "30"], "properties": ["vocalize, chirp, continuously", "quacks, continuously, duck"], "captions_pred_video": ["of a bird in a cage", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["birds are chirping and singing", "a duck is quacking loudly"], "question": "which entity is a duck?", "label": 1}, {"captions": ["an engine runs and a man speaks", "a horn rings out as a machine runs by"], "sample_ids": ["yT5WfYMRr-U", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["engine, run, man", "a, horn, run"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a person snoring", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["t8tv5YRMJUg", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["a person, snore, loud", "animal, grunts, snorts"], "captions_pred_video": ["of a man getting his face licked by another man", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person sniffs and breathes heavily", "a woman is speaking and a baby is crying"], "question": "which entity is not a person?", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "water is sprayed across a hard surface"], "sample_ids": ["wudZTNBtVqc", "sQwlkXjQabo"], "start_seconds": ["60", "10"], "properties": ["accelerates, engine, wind", "water, spray, surface"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a car accelerates and revs its engine ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "water splashes as an animal walks through"], "sample_ids": ["zofjfKhqLk8", "w1ir-sZ3Im8"], "start_seconds": ["10", "90"], "properties": ["noise, stop, motor", "animal, water, splashes"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "water splashes and gurgles as people speak"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["a man talks while vehicles pass by", "a clock ticktocks"], "sample_ids": ["sK4u5T8hW78", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["a, man, talk", "ticktocks, clock, ticktocks"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["a woman talking as an infant is crying", "an airplane engine spools and people speak"], "sample_ids": ["tMbMDvT50j8", "wTjoRj1se3U"], "start_seconds": ["12", "390"], "properties": ["a, talk, infant", "airplane, engine, spool"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a baby cries and a woman speaks", "a jet engine is running and people are talking"], "question": "which entity is a video of a person talking to an infant?", "label": 0}, {"captions": ["water gurgles, metal squeaks and the water stops", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["x4a9YGIw4ok", "zj2R0XoFr5k"], "start_seconds": ["120", "50"], "properties": ["water, gurgles, stops", "airplane, boy, fly"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a toilet flushes and water splashes", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "a propeller rotates loudly and intensely"], "sample_ids": ["y2ZBGpgbhHM", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["animal, growl, bird", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["birds chirping and a dog panting", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "vehicles pass by on a roadway"], "sample_ids": ["vcmWSmvti8", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["music, man, fire", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a car is driving on the road "], "question": "which entity is more likely to be in motion", "label": 1}, {"captions": ["an airplane accelerates briefly", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["zjTG0gaGCUI", "xKB8O8LTs6s"], "start_seconds": ["80", "70"], "properties": ["accelerates, airplane, briefly", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a jet engine roars as wind blows ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is a movie?", "label": 1}, {"captions": ["a train horn blares as a train passes, then fades", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["zVacuqSb4LI", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["blares, fades, train", "rooster, crow, background, men"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["music plays followed by gunshots and then an explosion", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["xKB8O8LTs6s", "zl9Dqx-j7q4"], "start_seconds": ["70", "6"], "properties": ["music, gunshots, explosion", "engine, laugh, loud"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage of a man driving a car in the dark"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a person speaks over rustling leaves", "a vehicle engine runs and someone speaks"], "sample_ids": ["zOZleIRqZm4", "zF8yoL0rkbI"], "start_seconds": ["80", "30"], "properties": ["rustling, leaves, person", "engine, run, someone"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of the traffic on the street at night"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "the wind is blowing hard and water is splashing"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zj2R0XoFr5k", "tdWhHV3X25Q"], "start_seconds": ["50", "60"], "properties": ["airplane, boy, fly", "applause, audience, yells"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "water is sprayed across a hard surface"], "sample_ids": ["uYT5gxnyMWM", "sQwlkXjQabo"], "start_seconds": ["50", "10"], "properties": ["person, spray, yell", "water, spray, surface"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "spraying followed by silence"], "question": "which entity is a video of a person speaking over spraying and another person yelling?", "label": 0}, {"captions": ["an engine revs and a turning noise is made", "someone is burping continuously"], "sample_ids": ["tOSWIURC-4", "y636gklDioE"], "start_seconds": ["0", "20"], "properties": ["noise, engine, revs", "burps, burps, burps"], "captions_pred_video": [null, "a dog sitting on a red chair in front of an old telephone"], "captions_pred_audio": ["a lawn mower is running ", "a person burps loudly several times"], "question": "which noise is made by a human", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "a clock ticks quietly and rhythmically"], "sample_ids": ["tK4VlLsNxak", "u7C-AEBQM"], "start_seconds": ["120", "30"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "ticks, rhythmic, quiet"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", null], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a ticktock of a clock"], "question": "which entity is quieter", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "a vehicle accelerates before a race car idles then accelerates quickly"], "sample_ids": ["tEE3MpBt1sg", "sjlVMgdGSK0"], "start_seconds": ["50", "30"], "properties": ["drill, something, laugh", "accelerates, vehicle, race car"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a 1965 ford falcon drag racing at 100mph on a 1/8 mile track"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a frog vocalizes while birds chirp", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["vMf1dLD6Sng", "y2bVZ7rz-5M"], "start_seconds": ["6", "280"], "properties": ["frog, bird, vocalize", "motor noise, horn, siren"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a frog croaks loudly", "a truck is honking its horn and a siren is blaring "], "question": "which entity is not a frog?", "label": 1}, {"captions": ["a man speaks while video game music plays with some clicking", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["tw76HGONaKg", "ukg5L09Wpvo"], "start_seconds": ["570", "150"], "properties": ["music, click, man", "clickety-clack, train, whistle"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["yDoT73BWsdA", "zFjIWfSD-4"], "start_seconds": ["10", "410"], "properties": ["engine, revs, vehicle", "People, motor, brakes"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running and air brakes hissing?", "label": 1}, {"captions": ["a person is snoring while sleeping", "a clock ticktocks"], "sample_ids": ["vJrjSeP17yE", "v-g-j2uTByM"], "start_seconds": ["40", "30"], "properties": ["a person is sleeping, snoring, person", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a black background with a small plane flying in the sky", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a person snoring loudly", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "waves crash against a shoreline and people speak"], "sample_ids": ["zsLxS-uLJTw", "yFB25fqfU8I"], "start_seconds": ["20", "300"], "properties": ["horn, blast, train", "wave, crash, shoreline"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a duck quacks several times", "someone whistles a tune"], "sample_ids": ["vh30P49Po6s", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["quacks, duck, several", "someone, tune, whistle"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "a person whistling a song"], "question": "which entity is a human", "label": 1}, {"captions": ["a helicopter engine runs", "a duck quacks continuously"], "sample_ids": ["t5ZbXbniOWk", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["engine, helicopter, run", "quacks, continuously, duck"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a helicopter is flying overhead ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a man speaks as a car is passing by", "water rushes and then a vehicle zooms past"], "sample_ids": ["sK4u5T8hW78", "s4Uz1Ffgo04"], "start_seconds": ["30", "100"], "properties": ["a, car, pass", "water, rushes, vehicle"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is about a vehicle zooming past?", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "an infant crying as a woman laughs"], "sample_ids": ["wDVMhEdTiVw", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["gun, shoot, water", "a, laugh, infant"], "captions_pred_video": ["a blurry image of trees and water in the forest", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "people speak as gunfire rings out"], "sample_ids": ["vcmWSmvti8", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["music, man, fire", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a man is speaking and a gun is fired"], "question": "which entity is about a man speaking as music plays before artillery is fired?", "label": 0}, {"captions": ["an adult woman and an adult man speak", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["zTLVJCo4WEE", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["two people, adult, speak", "rustling, ducks, quack"], "captions_pred_video": ["- a boy with a rifle aiming at a target", null], "captions_pred_audio": ["a woman speaks and crickets chirp", "a duck quacks and a woman speaks"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["several ducks are quacking and squawking", "a man speaks followed by another man speaking outside"], "sample_ids": ["wfHeoPDLMaM", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["quacking, squawking, ducks", "two men, speak, follow"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["ducks are quacking", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a group of animals?", "label": 0}, {"captions": ["a door opens and closes", "people applaud and hoot and chat quietly"], "sample_ids": ["vBHyYJ8pL0", "wwyfGO2J4"], "start_seconds": ["2", "90"], "properties": ["open, close, door", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "a child speaks in closed space"], "sample_ids": ["u--KhUW8l1Y", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["horn, siren, life", "child, space, speak"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "water flows as men speak and yell"], "sample_ids": ["vddP56-ogds", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["water, flow, laugh", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["three men talk while wind blows and some liquid flows", "someone whistles a tune"], "sample_ids": ["vJ7JPEFhyLA", "sIXTftIuUgw"], "start_seconds": ["16", "90"], "properties": ["three men, wind, flow", "someone, tune, whistle"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a motorcycle engine is idling", "paper is crumpling consistently"], "sample_ids": ["vZAqdHZ81yA", "v5cSxLaHADY"], "start_seconds": ["180", "0"], "properties": ["engine, motorcycle, idling", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["an engine is idling loudly", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a woman talking as an infant is crying", "three men talk while wind blows and some liquid flows"], "sample_ids": ["tMbMDvT50j8", "vJ7JPEFhyLA"], "start_seconds": ["12", "16"], "properties": ["a, talk, infant", "three men, wind, flow"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a woman talking to an infant?", "label": 0}, {"captions": ["a vehicle accelerates and squeals tires", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["yRx9txMcBl0", "uZesmtKZGSw"], "start_seconds": ["40", "250"], "properties": ["accelerates, tires, squeals", "men, talk, cars"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["a person whistles and clicks a mouse", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["zCrAfDfv6-A", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["person, mouse, click", "engine, idle, woman"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a person whistles a song", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "pigeons vocalize and birds chirp"], "sample_ids": ["uiItxDsDMFI", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["wood, piece, saw", "vocalize, bird, chirp"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "of the pigeon in the cage"], "captions_pred_audio": ["a saw is being used with background noise ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a telephone rings and a bird vocalizes", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["skd2PphS6oI", "yajyRTUQk3U"], "start_seconds": ["190", "400"], "properties": ["ring, bird, vocalize", "a woman, something, fried"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a telephone bell rings repeatedly ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["some men converse over an engine running", "vehicles pass by on a roadway"], "sample_ids": ["sCiy7QS1U", "tgbONvsP47Y"], "start_seconds": ["300", "0"], "properties": ["men, converse, engine", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a car accelerates and wind blows", "a person snores loudly multiple times at a close distance"], "sample_ids": ["u0TrcHhkPQ", "sSMl2vc3ek"], "start_seconds": ["20", "20"], "properties": ["accelerates, wind, blows", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["s6DESzUTGjY", "y8WEcpOlT3I"], "start_seconds": ["16", "40"], "properties": ["wind, laugh, woman", "harsh, wind, blows"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is softer", "label": 0}, {"captions": ["dishes cling together then a man begins to speak", "pigeons vocalize and birds chirp"], "sample_ids": ["sQGXqGcwOTc", "uiS58TNyUiw"], "start_seconds": ["3", "430"], "properties": ["cling, speak, dishes", "vocalize, bird, chirp"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "of the pigeon in the cage"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "a weapon fires multiple times"], "sample_ids": ["t25U-v4k4ts", "sMC07Ucy7kg"], "start_seconds": ["40", "10"], "properties": ["a, chirps, bird", "weapon, fire, multiple"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "footage is from a car's point of view"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "water pouring and bubbling"], "sample_ids": ["wnpJndXuxLc", "uyRfq-jKPpo"], "start_seconds": ["50", "50"], "properties": ["beeps, loud, whistle", "water, bubbles, pouring"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "a train horn blows as it passes by"], "sample_ids": ["u5RmF3c3Aw", "zVacuqSb4LI"], "start_seconds": ["60", "30"], "properties": ["engine, car, zoom", "horn, blows, train"], "captions_pred_video": [null, "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train?", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["tQWGZLItBXk", "w34HjHr6gAY"], "start_seconds": ["170", "30"], "properties": ["music, person, ding", "beeps, hit, woman"], "captions_pred_video": ["worms revolution screenshots", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a beep sounds followed by a child speaking"], "question": "which entity has a person speaking?", "label": 0}, {"captions": ["a helicopter engine runs continuously", "a power tool runs and touches a surface"], "sample_ids": ["ugHJF0hfYkg", "zfvPRf3chY"], "start_seconds": ["10", "290"], "properties": ["engine, running, continuously", "power tool, run, touch"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking while a power tool is being used "], "question": "which entity is not running continuously", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["xvDdE3zNf8Y", "uEU-Hg5MTN8"], "start_seconds": ["120", "27"], "properties": ["a, female, speaks", "a woman, laughs, animal"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman speaks and crumples paper", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking softly?", "label": 0}, {"captions": ["multiple people speak and children yell while water gurgles", "water flows and trickles"], "sample_ids": ["vb1fPSDI4c", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["multiple, people, yell", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a crowd of people are talking and laughing", "water is splashing and gurgling"], "question": "which entity is a video of water flowing and trickling?", "label": 1}, {"captions": ["a few ducks quack and scamper and a man speaks", "birds chirp and objects are moved around"], "sample_ids": ["w2bYrCVLT60", "yPUYU6t3rwo"], "start_seconds": ["120", "370"], "properties": ["ducks, speak, quack", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["ducks are quacking and a man is speaking", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "a horse runs while two women talk"], "sample_ids": ["vuUVPzd2FXw", "sdvI1mHAsc"], "start_seconds": ["160", "20"], "properties": ["a, steam, release", "two women, horse, run"], "captions_pred_video": ["of the person cooking on the grill with a spatula", null], "captions_pred_audio": ["a man is speaking and dishes are clanging", "horses clip-clop and a woman speaks"], "question": "which entity is a horse?", "label": 1}, {"captions": ["a car accelerates and wind blows", "a toilet door squeaks as it is opened"], "sample_ids": ["u0TrcHhkPQ", "sdXV-ylviw"], "start_seconds": ["20", "190"], "properties": ["accelerates, wind, blows", "door, toilet, squeaks"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a dog barks and taps with background noise "], "question": "which door is squeaking", "label": 1}, {"captions": ["women speak and laugh as wind blows", "small dogs yip and bark sharply"], "sample_ids": ["un9VQlzgZM", "v-wcQf4BDY0"], "start_seconds": ["5", "120"], "properties": ["wind, speak, laugh", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a person is snoring while sleeping", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vJrjSeP17yE", "vfYTJq7nU"], "start_seconds": ["40", "130"], "properties": ["a person is sleeping, snoring, person", "rustling, ducks, quack"], "captions_pred_video": ["a black background with a small plane flying in the sky", null], "captions_pred_audio": ["a person snoring loudly", "a duck quacks and a woman speaks"], "question": "which entity is a video", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "a telephone rings followed by a woman talking"], "sample_ids": ["tK4VlLsNxak", "tGcFnX0GHI"], "start_seconds": ["120", "0"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "ring, talk, woman"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", null], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a ringing phone", "label": 1}, {"captions": ["scraping and female speech with distant music", "a wooden clack accompanies nearby chirping birds"], "sample_ids": ["yHeVV-xeOxQ", "yeFvk9x0wWI"], "start_seconds": ["130", "30"], "properties": ["female, speech, music", "clack, bird, chirp"], "captions_pred_video": ["of a girl milking a goat's udder", "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "birds chirp in the background as a car drives by "], "question": "which entity is accompanied by birds", "label": 1}, {"captions": ["a man speaks as a machine runs", "an infant crying as a woman laughs"], "sample_ids": ["vD6lYD1l0BY", "xhmRY9yhC7c"], "start_seconds": ["330", "20"], "properties": ["a, machine, run", "a, laugh, infant"], "captions_pred_video": ["game controller being held in the hands of the person", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["s3cTDAj31g", "ziUT9IFTkjg"], "start_seconds": ["80", "10"], "properties": ["man, talk, woman", "background, birds, rustling"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a baby is crying", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["people speak and tapping occurs", "a telephone rings followed by a woman talking"], "sample_ids": ["tFCUUGdREgA", "tGcFnX0GHI"], "start_seconds": ["70", "0"], "properties": ["people, tap, speak", "ring, talk, woman"], "captions_pred_video": ["a person riding a white horse in an indoor arena", null], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a man talks as several small engines run", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["u9A6VZQCZpU", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["a, man, talk", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "a telephone rings followed by a woman talking"], "sample_ids": ["vZAw4apG0Es", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["background, clock, ticktocks", "ring, talk, woman"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a dial tone sounds followed by a woman speaking"], "question": "which entity has a clock ticking in the background", "label": 0}, {"captions": ["water flows as a woman laughs and a man speaks", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["vddP56-ogds", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["water, flow, laugh", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking with wind noise in the background "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "a stream of water runs briefly"], "sample_ids": ["sShpyu2l4YQ", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["growl, bark, yip", "stream, water, run"], "captions_pred_video": ["the puppies are playing with a toy", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a dog is barking and growling", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["un9VQlzgZM", "su6FAOcOA8c"], "start_seconds": ["5", "4"], "properties": ["females, talk, laugh", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a woman is speaking and a subway train is moving "], "question": "which entity is about a bus engine?", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "a stream of water runs briefly"], "sample_ids": ["vhJWZheqaE", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["water drains unevenly, toilet flushes, water drains", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a toilet is flushed", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "a woman speaks happily and an animal chirps"], "sample_ids": ["xvDdE3zNf8Y", "uWAAAL4CIoc"], "start_seconds": ["120", "0"], "properties": ["a, female, speaks", "a woman, chirps, animal"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", null], "captions_pred_audio": ["a woman speaks and crumples paper", "a woman is speaking and a dog is barking "], "question": "which entity is more active", "label": 1}, {"captions": ["electronic beeps occur in a short series", "vehicle engines race around a track as a man commentates"], "sample_ids": ["y682ml90jGw", "sZPuqDgX2V0"], "start_seconds": ["11", "30"], "properties": ["beeps, series, electronic", "commentator, race, track"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "a man is speaking and a helicopter is flying overhead "], "question": "which entity is a video of a race", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "someone is typing on a computer keyboard"], "sample_ids": ["vlS6YMeWAPo", "v0x1odnXtP0"], "start_seconds": ["40", "210"], "properties": ["noise, bleat, call", "keyboard, type, computer"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "how to make money on youtube in spanish"], "captions_pred_audio": ["a goat bleats and birds chirp", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "an infant crying frantically"], "sample_ids": ["sHbXC6na9hg", "zwOBqeFTgiU"], "start_seconds": ["0", "30"], "properties": ["a person, saw, wood", "cry, infant, frantically"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "of the baby crying in the car seat"], "captions_pred_audio": ["an engine is idling and vibrating", "a baby cries loudly"], "question": "which entity is a person", "label": 0}, {"captions": ["winds blows roughly as a vehicle races past", "wind blows as people chatter quietly"], "sample_ids": ["xjvTpk2Zpr8", "xBxDz0CFVn0"], "start_seconds": ["70", "30"], "properties": ["wind, blows, vehicle", "wind, chatter, people"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "footage is blurry and out of focus"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a man is speaking with wind noise in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["a helicopter engine idles continuously", "some men converse over an engine running"], "sample_ids": ["ugHJF0hfYkg", "sCiy7QS1U"], "start_seconds": ["10", "300"], "properties": ["engine, idle, continuously", "men, converse, engine"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a jet engine spools up and takes off", "a horn rings out as a machine runs by"], "sample_ids": ["vBslzh7saPw", "slZLHwNbbt4"], "start_seconds": ["90", "300"], "properties": ["engine, spools, takes", "a, horn, run"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["shmR4OZtzqA", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["man, engine, idle", "wind, blow, vehicle"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", null], "captions_pred_audio": ["a man speaks while a motor runs", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a vehicle engine running?", "label": 1}, {"captions": ["people speak softly as food sizzles", "people speak as gunfire rings out"], "sample_ids": ["yhQ2Lg-7qDY", "wqTCwqVRDlk"], "start_seconds": ["130", "80"], "properties": ["food, sizzle, speak", "gunfire, ring, speak"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "an airplane engine runs"], "sample_ids": ["uPDn2BFTHk", "yVPZ2MNWpms"], "start_seconds": ["140", "0"], "properties": ["woman, laughs, speaks", "engine, airplane, runs"], "captions_pred_video": [null, "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a kid speaks followed by music playing", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["tQWGZLItBXk", "uYT5gxnyMWM"], "start_seconds": ["170", "50"], "properties": ["music, kid, speak", "female, spraying, scream"], "captions_pred_video": ["worms revolution screenshots", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking and then playing music?", "label": 0}, {"captions": ["material crumbles into a microphone", "an infant crying as a woman laughs"], "sample_ids": ["vofpvUo6NAw", "xhmRY9yhC7c"], "start_seconds": ["220", "20"], "properties": ["material, crumbles, microphone", "a, laugh, infant"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "a vehicles accelerate quickly and someone laughs"], "sample_ids": ["y2bVZ7rz-5M", "uWPRNLnpy7Y"], "start_seconds": ["280", "10"], "properties": ["engine, horn, siren", "accelerate, laugh, vehicle"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "is taken from a car driving down the street"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a car accelerates and revs its engine "], "question": "which vehicle is moving", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "an engine runs loudly"], "sample_ids": ["xyx6eNVEYRY", "vqZuVbG6-HI"], "start_seconds": ["380", "130"], "properties": ["loud, engine, muffles", "loud, engine, run"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "footage is blurry because it's raining outside"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["some men converse over an engine running", "multiple people speak and children yell while water gurgles"], "sample_ids": ["sCiy7QS1U", "vb1fPSDI4c"], "start_seconds": ["300", "30"], "properties": ["men, converse, engine", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["a flush is followed by gurgling water, then another flush", "several insects fly while two men talk"], "sample_ids": ["tqR406bGiE", "s-T9OVOiMLo"], "start_seconds": ["40", "330"], "properties": ["flush, water, gurgle", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a toilet?", "label": 0}, {"captions": ["a man talks while vehicles pass by", "a bird chirps in response to a woman chirping for the birds"], "sample_ids": ["sK4u5T8hW78", "uOpoD0gGXcs"], "start_seconds": ["30", "120"], "properties": ["a, man, talk", "chirps, woman, bird"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "a herd of cows grazing in the field"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "birds are chirping and a man is speaking"], "question": "which entity is a response to a woman?", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "a man speaks as a car is passing by"], "sample_ids": ["smDKStoHBJo", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["a, talk, baby, cry", "a, car, pass"], "captions_pred_video": ["a man holding a crying baby in his arms", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a baby?", "label": 0}, {"captions": ["a man speaks while water drains", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vSeGhaZt-aI", "su6FAOcOA8c"], "start_seconds": ["50", "4"], "properties": ["water, drain, man", "engine, idle, woman"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "dishes cling together then a man begins to speak"], "sample_ids": ["vJvryTwuAV8", "sQGXqGcwOTc"], "start_seconds": ["16", "3"], "properties": ["audience, cheer, man", "cling, speak, dishes"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking to an audience?", "label": 0}, {"captions": ["a speedboat passes quickly on the water", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["tjmoSi330GM", "uZesmtKZGSw"], "start_seconds": ["23", "250"], "properties": ["speed, water, boat", "men, talk, cars"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is moving faster", "label": 1}, {"captions": ["children speak and play together", "people applaud and hoot and chat quietly"], "sample_ids": ["yVVP8XvWJTo", "wwyfGO2J4"], "start_seconds": ["260", "90"], "properties": ["children, speak, play", "people, applaud, hoot"], "captions_pred_video": ["footage of a playground at a school or daycare center", null], "captions_pred_audio": ["children are speaking and breathing with background noise ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "a clock ticktocks"], "sample_ids": ["w2JXXIAdUdg", "v-g-j2uTByM"], "start_seconds": ["10", "30"], "properties": ["snoring, distance, person", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a man speaks as a motor runs in the background"], "sample_ids": ["wz7N8YRy74I", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["rooster, crow, background, people", "background, motor, run"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "an infant crying as a woman laughs"], "sample_ids": ["vb1fPSDI4c", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["multiple, people, yell", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a toilet flushes and water drains", "wind blowing followed by a zoom"], "sample_ids": ["sfAvvZwdLCY", "vr8ZXjEBhMQ"], "start_seconds": ["20", "150"], "properties": ["water drains, flushes, water", "wind, blow, zoom"], "captions_pred_video": ["footage of the toilet in the bathroom", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a toilet is flushed", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "vehicles pass by on a roadway"], "sample_ids": ["zkKdxzNC97Y", "tgbONvsP47Y"], "start_seconds": ["27", "0"], "properties": ["hard, surface, door", "pass, vehicle, roadway"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a door is opened and closed", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "pigeons vocalize and birds chirp"], "sample_ids": ["wwyfGO2J4", "uiS58TNyUiw"], "start_seconds": ["90", "430"], "properties": ["people, applaud, hoot", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "a telephone rings followed by a woman talking"], "sample_ids": ["zofjfKhqLk8", "tGcFnX0GHI"], "start_seconds": ["10", "0"], "properties": ["background, metal, clings", "ring, talk, woman"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a speedboat passes quickly on the water", "water splashes as an animal walks through"], "sample_ids": ["tjmoSi330GM", "w1ir-sZ3Im8"], "start_seconds": ["23", "90"], "properties": ["speed, water, boat", "animal, water, splashes"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "water splashes and gurgles as people speak"], "question": "which entity is moving at a slower speed", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "someone whistles a tune"], "sample_ids": ["sU53zg9Jp7s", "sIXTftIuUgw"], "start_seconds": ["380", "90"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "someone, tune, whistle"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", null], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a horse runs while two women talk", "a horn rings out as a machine runs by"], "sample_ids": ["sdvI1mHAsc", "slZLHwNbbt4"], "start_seconds": ["20", "300"], "properties": ["two women, horse, run", "a, horn, run"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "winds blows roughly as a vehicle races past"], "sample_ids": ["w-4gHptFNuU", "xjvTpk2Zpr8"], "start_seconds": ["21", "70"], "properties": ["engine revs, accelerates, bump", "wind, blows, vehicle"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["some people speak", "a drill runs and two people laugh"], "sample_ids": ["vbZ-0lGPneg", "tEE3MpBt1sg"], "start_seconds": ["30", "50"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "two people, laugh, drill"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "people are laughing breathing and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "paper is crumpling consistently"], "sample_ids": ["rwtmaKiCcQU", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["nozzle, depressed, spray can", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["spraying and people speaking", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "a baby laughs giddily and a woman laughs then speaks"], "sample_ids": ["vmrxwuAMb2I", "wjsXBsc7M40"], "start_seconds": ["40", "10"], "properties": ["a dog, inhales, exhales", "a baby laughs, a woman laughs, a woman speaks"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "footage of the baby playing with a toothbrush"], "captions_pred_audio": ["a dog barks and growls", "a baby laughs and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a diesel truck engine runs continuously", "water flows and trickles"], "sample_ids": ["sZvwOuuPGP0", "tB7hWb9gTuQ"], "start_seconds": ["50", "30"], "properties": ["engine, diesel, truck", "water, flow, trickle"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a medium engine is running ", "water is splashing and gurgling"], "question": "which entity is not a continuous flow", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["zkKdxzNC97Y", "ukg5L09Wpvo"], "start_seconds": ["27", "150"], "properties": ["hard, surface, door", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a door is opened and closed", "a train blows its whistle and blows its horn "], "question": "which entity is a train", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a man speaks as a motor runs in the background"], "sample_ids": ["zY3icUyMdh8", "xZepNM9qcRA"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "background, motor, run"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a baby cries and a woman moans", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["smDKStoHBJo", "tdWhHV3X25Q"], "start_seconds": ["0", "60"], "properties": ["a, cry, woman", "applause, audience, yells"], "captions_pred_video": ["a man holding a crying baby in his arms", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "winds blows roughly as a vehicle races past"], "sample_ids": ["vbpKkWvfOu4", "xjvTpk2Zpr8"], "start_seconds": ["560", "70"], "properties": ["a, man, speaks", "wind, blows, vehicle"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a jet engine roars and wind blows "], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["s59PfAghdkM", "tDlysoZiA1I"], "start_seconds": ["0", "0"], "properties": ["bird, chirp, background, horse, neigh", "animal, grunts, chirps"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "birds are chirping and a rooster is crowing "], "question": "which entity has a horse neighing?", "label": 0}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["ylpYOorfH4o", "wDVMhEdTiVw"], "start_seconds": ["410", "30"], "properties": ["engine, run, loud", "gun, shoot, water"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used for hunting", "label": 1}, {"captions": ["an engine starts and increases in power", "the revving of an engine throttle followed by a man speaking"], "sample_ids": ["zjTG0gaGCUI", "tezvROoo4bs"], "start_seconds": ["80", "40"], "properties": ["power, increase, engine", "audio, throttle, speaking"], "captions_pred_video": [null, "footage of a busy city street with cars parked on both sides of the road"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a car accelerates and revs while a man speaks "], "question": "which entity is about an engine?", "label": 0}, {"captions": ["water rushes and then a vehicle zooms past", "a person uses a saw to cut some wood"], "sample_ids": ["s4Uz1Ffgo04", "sHbXC6na9hg"], "start_seconds": ["100", "0"], "properties": ["water, rushes, vehicle", "a person, saw, wood"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "a man using a tractor to cut a log into firewood youtube"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "an engine is idling and vibrating"], "question": "which entity is a person", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "small dogs yip and bark sharply"], "sample_ids": ["sQGXqGcwOTc", "v-wcQf4BDY0"], "start_seconds": ["3", "120"], "properties": ["audio, kid, giggles", "bark, yip, sharply"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "vehicles pass by on a roadway"], "sample_ids": ["vzxHnu-SFEw", "tgbONvsP47Y"], "start_seconds": ["80", "0"], "properties": ["two objects, woman, speak", "pass, vehicle, roadway"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a car is driving on the road "], "question": "which object is moving", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "waves crash against a shoreline and people speak"], "sample_ids": ["zofjfKhqLk8", "yFB25fqfU8I"], "start_seconds": ["10", "300"], "properties": ["background, metal, clings", "wave, crash, shoreline"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more active", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "vehicles pass by on a roadway"], "sample_ids": ["x6ijhqRY38s", "tgbONvsP47Y"], "start_seconds": ["250", "0"], "properties": ["something metal, glass, hit", "pass, vehicle, roadway"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a sleeping person emits a gravely snore", "water pouring and bubbling"], "sample_ids": ["w2JXXIAdUdg", "uyRfq-jKPpo"], "start_seconds": ["10", "50"], "properties": ["emits, sleeping, person", "water, bubbles, pouring"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a person snoring and a dog whimpering", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "winds blows roughly as a vehicle races past"], "sample_ids": ["zgUgkpk78xU", "xjvTpk2Zpr8"], "start_seconds": ["70", "70"], "properties": ["clinking, humming, horn", "wind, blows, vehicle"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "children cheer as a man speaks then an audience screams"], "sample_ids": ["shmR4OZtzqA", "vJvryTwuAV8"], "start_seconds": ["30", "16"], "properties": ["man, engine, idle", "audience, cheer, man"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd"], "captions_pred_audio": ["a man speaks while a motor runs", "a man is speaking and a crowd is shouting and whooping "], "question": "which man is speaking to an audience?", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "winds blows roughly as a vehicle races past"], "sample_ids": ["ylpYOorfH4o", "xjvTpk2Zpr8"], "start_seconds": ["410", "70"], "properties": ["motor, run, steady", "wind, blows, vehicle"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["roadway noise occurs and a truck accelerates", "water is sprayed across a hard surface"], "sample_ids": ["tgbONvsP47Y", "sQwlkXjQabo"], "start_seconds": ["0", "10"], "properties": ["noise, truck, accelerate", "water, spray, surface"], "captions_pred_video": ["footage of a fire truck entering a garage", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a car is driving on the road ", "spraying followed by silence"], "question": "which is a liquid", "label": 1}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["wqZ135Ssz0", "zFjIWfSD-4"], "start_seconds": ["60", "410"], "properties": ["two men, woman, birds", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has more people", "label": 1}, {"captions": ["a man speaks as a machine runs", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vD6lYD1l0BY", "xKB8O8LTs6s"], "start_seconds": ["330", "70"], "properties": ["a, machine, run", "music, gunfire, explosion"], "captions_pred_video": ["game controller being held in the hands of the person", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["birds tweet and squawk", "a consistent ticking pattern"], "sample_ids": ["w1mlz3Pe4fU", "sCeWURVHfOM"], "start_seconds": ["300", "30"], "properties": ["squawk, tweet, scream", "ticking, pattern, clock"], "captions_pred_video": ["of a bird in a cage", "- a close-up view of the clock's inner workings"], "captions_pred_audio": ["birds are chirping and singing", "ticking of a clock"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person is snoring while sleeping", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vJrjSeP17yE", "vb1fPSDI4c"], "start_seconds": ["40", "30"], "properties": ["a person is sleeping, snoring, person", "multiple, people, yell"], "captions_pred_video": ["a black background with a small plane flying in the sky", null], "captions_pred_audio": ["a person snoring loudly", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a toilet flushes and water drains", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["sfAvvZwdLCY", "uEU-Hg5MTN8"], "start_seconds": ["20", "27"], "properties": ["water drains, flushes, water", "a woman, laughs, animal"], "captions_pred_video": ["footage of the toilet in the bathroom", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["children speak and play together", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["yVVP8XvWJTo", "uEU-Hg5MTN8"], "start_seconds": ["260", "27"], "properties": ["children, speak, play", "a woman, laughs, animal"], "captions_pred_video": ["footage of a playground at a school or daycare center", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a video of a woman speaking and laughing?", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["vzxHnu-SFEw", "uEU-Hg5MTN8"], "start_seconds": ["80", "27"], "properties": ["two objects, woman, speak", "animal, grunts, snorts"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["a man talks followed by a woman shouting", "water flows and trickles"], "sample_ids": ["s3cTDAj31g", "tB7hWb9gTuQ"], "start_seconds": ["80", "30"], "properties": ["man, talk, woman", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking and a baby is crying", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "people cheer as a vehicle engine revs"], "sample_ids": ["uJV8NDaHqqk", "xjhAnI2q6hM"], "start_seconds": ["100", "6"], "properties": ["loud, fly, chirp", "engine revs, vehicle, people"], "captions_pred_video": ["a bee hive in a wooden box", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a swarm of bees buzzing around", "a truck is revving its engine and a man is speaking "], "question": "which entity is not a person?", "label": 0}, {"captions": ["wind blows and people scream while an engine revs", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["w5W5Kqtc8E", "vlS6YMeWAPo"], "start_seconds": ["100", "40"], "properties": ["wind, engine, scream", "sheep, baa, birds"], "captions_pred_video": [null, "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "a woman speaks and other women and a man talk with her"], "sample_ids": ["y8dSeubCNI", "vbpKkWvfOu4"], "start_seconds": ["4", "560"], "properties": ["engine revving, people speaking, motorcycle", "a, woman, man"], "captions_pred_video": [null, "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["an engine revving and people talking in the background", "a woman is speaking and a man is speaking"], "question": "which entity has more people", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "wind blows as people chatter quietly"], "sample_ids": ["uC9dtII1KDI", "xBxDz0CFVn0"], "start_seconds": ["150", "30"], "properties": ["wind, gusts, distance", "wind, chatter, people"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["xjvTpk2Zpr8", "zl9Dqx-j7q4"], "start_seconds": ["70", "6"], "properties": ["wind, blows, vehicle", "engine, laugh, loud"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "a car speeding up in the distance"], "sample_ids": ["uPDn2BFTHk", "u0TrcHhkPQ"], "start_seconds": ["140", "20"], "properties": ["lady, laugh, baby", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "a propeller rotates loudly and intensely"], "sample_ids": ["yaln9y8I7ms", "ugHJF0hfYkg"], "start_seconds": ["230", "10"], "properties": ["female, flushes, toilet", "loud, intense, propeller"], "captions_pred_video": ["footage is blurry and out of focus", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp then an animal grunts", "a stream of water flows as people talk and wind blows"], "sample_ids": ["tDlysoZiA1I", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["animal, grunt, chirp", "stream, water, flow"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage is blurry and out of focus"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a man is speaking with wind noise in the background "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["ylpYOorfH4o", "w5W5Kqtc8E"], "start_seconds": ["410", "100"], "properties": ["motor, run, steady", "wind, blow, vehicle"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", null], "captions_pred_audio": ["a man is speaking and an engine is revving", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a vehicle engine running?", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a stream of water runs briefly"], "sample_ids": ["ugHJF0hfYkg", "x-PeY8Yb8M4"], "start_seconds": ["10", "300"], "properties": ["engine, running, continuously", "stream, water, run"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a helicopter is flying overhead ", "a car is driving on a wet road "], "question": "which entity is running continuously", "label": 0}, {"captions": ["a vehicle engine revs and tires squeal", "someone is typing on a computer keyboard"], "sample_ids": ["yDoT73BWsdA", "v0x1odnXtP0"], "start_seconds": ["10", "210"], "properties": ["engine revs, tires squeal, vehicle", "keyboard, type, computer"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "how to make money on youtube in spanish"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a person is typing on a keyboard"], "question": "which entity is stationary", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "someone is typing on a computer keyboard"], "sample_ids": ["yswmmRZFItk", "v0x1odnXtP0"], "start_seconds": ["0", "210"], "properties": ["background, frog, croak", "keyboard, type, computer"], "captions_pred_video": ["a close up of a frog in the water", "how to make money on youtube in spanish"], "captions_pred_audio": ["a frog is croaking", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "water pouring and bubbling"], "sample_ids": ["wvKpEYswXO0", "uyRfq-jKPpo"], "start_seconds": ["150", "50"], "properties": ["plastic, tap, speak", "water, bubbles, pouring"], "captions_pred_video": ["of the person preparing food in the kitchen", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "water is running from a faucet"], "question": "which entity is bubbling", "label": 0}, {"captions": ["a person is burping then speaks and laughs", "a man speaks as a motor runs in the background"], "sample_ids": ["wAAkbZToh8", "xZepNM9qcRA"], "start_seconds": ["0", "30"], "properties": ["burp, laugh, speak", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man burps and a woman speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["an infant crying frantically", "wind blows as people chatter quietly"], "sample_ids": ["zwOBqeFTgiU", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["cry, infant, frantically", "wind, chatter, people"], "captions_pred_video": ["of the baby crying in the car seat", "footage is blurry and out of focus"], "captions_pred_audio": ["a baby cries loudly", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a heavy rain falls endlessly", "winds blows roughly as a vehicle races past"], "sample_ids": ["wP8ZKrlx3oA", "xjvTpk2Zpr8"], "start_seconds": ["40", "70"], "properties": ["heavy, rain, fall", "wind, blows, vehicle"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["people speak then an engine runs", "pigeons vocalize and birds chirp"], "sample_ids": ["uMTTDZ2mb4", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["engine, run, people", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["yI-KvObbDoY", "w5W5Kqtc8E"], "start_seconds": ["260", "100"], "properties": ["sound, smack, wind", "wind, blow, vehicle"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", null], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blowing?", "label": 1}, {"captions": ["male speech with light ticking", "a man talks nearby and another man talks far away while some liquid flows"], "sample_ids": ["xO-Q2BlIIPU", "sapQIQUhFc"], "start_seconds": ["30", "280"], "properties": ["male, speech, ticking", "liquid, flow, distance"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and a stream is flowing in the background "], "question": "which entity is more distant", "label": 1}, {"captions": ["a woman speaks and is crumpling paper", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xvDdE3zNf8Y", "tDVADusiIoc"], "start_seconds": ["120", "60"], "properties": ["A, crumple, paper", "water, radio, man"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "people applaud and hoot and chat quietly"], "sample_ids": ["tQWGZLItBXk", "wwyfGO2J4"], "start_seconds": ["170", "90"], "properties": ["voice, music, whoosh", "people, applaud, hoot"], "captions_pred_video": ["worms revolution screenshots", null], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man is filing a hard object", "a stream of water flows quickly"], "sample_ids": ["vveS8HT7Uog", "wbHTKEJZyhc"], "start_seconds": ["100", "20"], "properties": ["a man, hard, object", "stream, water, flow"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a waterfall is flowing and people are speaking "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["yajyRTUQk3U", "tdWhHV3X25Q"], "start_seconds": ["400", "60"], "properties": ["noise, woman, speak", "applause, audience, yells"], "captions_pred_video": ["- a woman cooking in the kitchen", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vXlk0lIQBFo", "zl9Dqx-j7q4"], "start_seconds": ["470", "6"], "properties": ["wind, talk, vocalize", "engine, laugh, loud"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "footage of a man driving a car in the dark"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["xyL9F5VrjkE", "uEU-Hg5MTN8"], "start_seconds": ["20", "27"], "properties": ["wind, motor, distance", "a woman, laughs, animal"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a movie", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vbZ-0lGPneg", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["a woman, a television program, a bird", "wind, blow, vehicle"], "captions_pred_video": ["of a man holding a baby duck in his hands", null], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has more moving parts", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["vh30P49Po6s", "vlS6YMeWAPo"], "start_seconds": ["30", "40"], "properties": ["loud, continuous, quacks", "sheep, baa, birds"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a duck is quacking loudly", "a goat bleats and birds chirp"], "question": "which entity is quieter", "label": 1}, {"captions": ["a stream of water flows quickly", "wind blowing followed by a zoom"], "sample_ids": ["wbHTKEJZyhc", "vr8ZXjEBhMQ"], "start_seconds": ["20", "150"], "properties": ["stream, water, flow", "wind, blow, zoom"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is moving at a slower speed", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "a stream of water runs briefly"], "sample_ids": ["wSVhSdj0F0", "x-PeY8Yb8M4"], "start_seconds": ["10", "300"], "properties": ["horn honks, keys jingle, slam", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a drill runs and two people laugh", "three men talk while wind blows and some liquid flows"], "sample_ids": ["tEE3MpBt1sg", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["two people, laugh, drill", "three men, wind, flow"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows a drill running and two people laughing?", "label": 0}, {"captions": ["wind blows as people chatter quietly", "vehicles pass by on a roadway"], "sample_ids": ["xBxDz0CFVn0", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["wind, chatter, people", "pass, vehicle, roadway"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "an airplane engine runs"], "sample_ids": ["xjhAnI2q6hM", "yVPZ2MNWpms"], "start_seconds": ["6", "0"], "properties": ["engine revs, vehicle, people", "engine, airplane, runs"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a car is driving by on the road "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["a man speaks as a vehicle engine idles", "people speak in the background as a clock ticktocks"], "sample_ids": ["shmR4OZtzqA", "vZAw4apG0Es"], "start_seconds": ["30", "30"], "properties": ["man, engine, idle", "background, clock, ticktocks"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["a man speaks while a motor runs", "a clock is ticking and people are talking"], "question": "which entity has a clock ticking in the background?", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["tQWGZLItBXk", "xfaoyyzw2WU"], "start_seconds": ["170", "180"], "properties": ["voice, music, whoosh", "loud, jet engine, roar"], "captions_pred_video": ["worms revolution screenshots", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "a woman speaks happily and an animal chirps"], "sample_ids": ["smDKStoHBJo", "uWAAAL4CIoc"], "start_seconds": ["0", "0"], "properties": ["a, talk, baby, cry", "a woman, chirps, animal"], "captions_pred_video": ["a man holding a crying baby in his arms", null], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a woman is speaking and a dog is barking "], "question": "which entity has a baby?", "label": 0}, {"captions": ["a person is whistling", "water flows as men speak and yell"], "sample_ids": ["sIXTftIuUgw", "vJ7JPEFhyLA"], "start_seconds": ["90", "16"], "properties": ["person, whistling, person", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a person whistling a song", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a person speaking and yelling?", "label": 1}, {"captions": ["a man speaks as a machine runs", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vD6lYD1l0BY", "tDVADusiIoc"], "start_seconds": ["330", "60"], "properties": ["a, machine, run", "water, radio, man"], "captions_pred_video": ["game controller being held in the hands of the person", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio?", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "a female speaks softly as paper crinkles"], "sample_ids": ["weDbePuc-Xc", "xvDdE3zNf8Y"], "start_seconds": ["40", "120"], "properties": ["cartoon character, music, vocalize", "a, female, speaks"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "of a woman in a white shirt and glasses holding a purple tie"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a woman speaks and crumples paper"], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a woman speaks and food sizzles while frying", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wTideSjRFS0", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["food, sizzle, woman", "music, gunfire, explosion"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "people applaud and hoot and chat quietly"], "sample_ids": ["spJCm8tD9Zo", "wwyfGO2J4"], "start_seconds": ["90", "90"], "properties": ["snores, wheezes, sleeps", "people, applaud, hoot"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp as a train approaches", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["xM4joTqDVp4", "zj2R0XoFr5k"], "start_seconds": ["160", "50"], "properties": ["bird, chirp, train", "airplane, boy, fly"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "some tunes played by whistling"], "sample_ids": ["vf9xf3vMsGM", "u6BnG6YZqJ4"], "start_seconds": ["540", "0"], "properties": ["A man speaks while turning a water faucet on.", "tune, play, whistling"], "captions_pred_video": ["of the person washing their hands under the faucet", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a small engine idles continuously", "a train horn blows as it passes by"], "sample_ids": ["y5WII6cTH7k", "zVacuqSb4LI"], "start_seconds": ["40", "30"], "properties": ["engine, idle, continuously", "horn, blows, train"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "someone snores nearby"], "sample_ids": ["vmrxwuAMb2I", "spJCm8tD9Zo"], "start_seconds": ["40", "90"], "properties": ["a dog, inhales, exhales", "someone snores, nearby, someone"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a dog barks and growls", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["water flows followed by women screaming", "race cars go around a track as a man commentates"], "sample_ids": ["w5W5Kqtc8E", "uZesmtKZGSw"], "start_seconds": ["100", "250"], "properties": ["water, flow, women", "car, track, man"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is a video of a race?", "label": 1}, {"captions": ["a toilet flushes and water drains", "paper is crumpling consistently"], "sample_ids": ["sfAvvZwdLCY", "v5cSxLaHADY"], "start_seconds": ["20", "0"], "properties": ["water drains, flushes, water", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a toilet is flushed", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["yeFvk9x0wWI", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["clack, bird, chirp", "engine, laugh, loud"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a jet engine roars "], "question": "which entity is followed by a man laughing", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "a child speaks in closed space"], "sample_ids": ["yZmhM1HcsyE", "yW6FWLSLkx4"], "start_seconds": ["4", "40"], "properties": ["engine, roar, water", "child, space, speak"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["some men converse over an engine running", "a clock ticktocks"], "sample_ids": ["sCiy7QS1U", "v-g-j2uTByM"], "start_seconds": ["300", "30"], "properties": ["men, converse, engine", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vcmWSmvti8", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["music, man, fire", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a video of a gun shooting?", "label": 1}, {"captions": ["someone snores nearby", "a duck quacks loudly and continuously"], "sample_ids": ["spJCm8tD9Zo", "vh30P49Po6s"], "start_seconds": ["90", "30"], "properties": ["someone snores, nearby, someone", "loud, continuous, quacks"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a person is snoring loudly", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 0}, {"captions": ["a vehicles accelerate quickly and someone laughs", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["uWPRNLnpy7Y", "w5W5Kqtc8E"], "start_seconds": ["10", "100"], "properties": ["accelerate, laugh, vehicle", "wind, blow, vehicle"], "captions_pred_video": ["is taken from a car driving down the street", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a motorboat is moving and people are shouting and cheering "], "question": "which vehicle is moving faster", "label": 0}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "speaking following by laughing and clapping"], "sample_ids": ["ukg5L09Wpvo", "u2f5NpsoHBg"], "start_seconds": ["150", "30"], "properties": ["clickety-clack, train, whistle", "person, laugh, clap"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "is being projected on a screen at the front of the stage"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a woman is speaking and a crowd is clapping"], "question": "which entity is clapping", "label": 1}, {"captions": ["wind blows as people chatter quietly", "a stream of water flows as people talk and wind blows"], "sample_ids": ["xBxDz0CFVn0", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["wind, chatter, people", "stream, water, flow"], "captions_pred_video": ["footage is blurry and out of focus", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more likely to be in a stream", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "paper folding and crinkling"], "sample_ids": ["yRx9txMcBl0", "zPpG3RD8lSs"], "start_seconds": ["40", "20"], "properties": ["accelerates, tires, squeals", "paper, fold, crinkle"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a car is revving its engine and skidding ", "the wind blows and a mouse clicks "], "question": "which entity is not a vehicle?", "label": 1}, {"captions": ["a man speaks as a machine runs", "a car accelerates and wind blows"], "sample_ids": ["vD6lYD1l0BY", "u0TrcHhkPQ"], "start_seconds": ["330", "20"], "properties": ["a, machine, run", "accelerates, wind, blows"], "captions_pred_video": ["game controller being held in the hands of the person", null], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vbpKkWvfOu4", "tiDFTC-5vU"], "start_seconds": ["560", "30"], "properties": ["a, man, speaks", "male, duck, laugh"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a man is speaking and ducks are quacking"], "question": "which entity has a duck speaking?", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "an infant crying as a woman laughs"], "sample_ids": ["xvDdE3zNf8Y", "xhmRY9yhC7c"], "start_seconds": ["120", "20"], "properties": ["a, female, speaks", "a, laugh, infant"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman speaks and crumples paper", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a infant makes noise and is excited", "a frog croaks as other frogs croak in the background"], "sample_ids": ["wIJK3-5y0kA", "yswmmRZFItk"], "start_seconds": ["30", "0"], "properties": ["noise, excited, infant", "background, frog, croak"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "a close up of a frog in the water"], "captions_pred_audio": ["a baby cries and a woman speaks", "a frog is croaking"], "question": "which entity is a croaker", "label": 1}, {"captions": ["a dog barks and whimpers", "a woman speaks and other women and a man talk with her"], "sample_ids": ["sShpyu2l4YQ", "vbpKkWvfOu4"], "start_seconds": ["0", "560"], "properties": ["barks, whimpers, dog", "a, woman, man"], "captions_pred_video": ["the puppies are playing with a toy", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a dog is barking and growling", "a woman is speaking and a man is speaking"], "question": "which entity is more social", "label": 1}, {"captions": ["a toilet flushes and water drains", "a clock ticktocks briefly"], "sample_ids": ["sfAvvZwdLCY", "u7C-AEBQM"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "ticktocks, clock, ticktocks briefly"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a ticktock of a clock"], "question": "which entity is a clock?", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "small dogs yip and bark sharply"], "sample_ids": ["s4Uz1Ffgo04", "v-wcQf4BDY0"], "start_seconds": ["100", "120"], "properties": ["water, rushes, vehicle", "bark, yip, sharply"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "birds chirp, a woman speaks, and insects buzz"], "sample_ids": ["weDbePuc-Xc", "t97k0cejSQE"], "start_seconds": ["40", "250"], "properties": ["music, slaps, human", "sound, chirp, buzz"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "a bee on a purple thistle flower"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a bee buzzes and a woman speaks"], "question": "which entity has a human sniveling?", "label": 0}, {"captions": ["material crumbles into a microphone", "a child speaks in closed space"], "sample_ids": ["vofpvUo6NAw", "yW6FWLSLkx4"], "start_seconds": ["220", "40"], "properties": ["material, crumbles, microphone", "child, space, speak"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a car speeding up in the distance", "pigeons vocalize and birds chirp"], "sample_ids": ["u0TrcHhkPQ", "uiS58TNyUiw"], "start_seconds": ["20", "430"], "properties": ["distance, car, speed", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a car?", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "an adult speaks and is typing on a computer keyboard"], "sample_ids": ["tPJvjq9QePY", "x9JovgqUcs"], "start_seconds": ["40", "500"], "properties": ["animal, bleat, moo", "An adult is speaking, typing, and using a computer keyboard"], "captions_pred_video": ["a dog and a sheep in a barn", null], "captions_pred_audio": ["a baby cries and a man speaks", "a man speaks and types on a keyboard"], "question": "which entity is speaking", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["tOj4tdLRaA", "wqZ135Ssz0"], "start_seconds": ["70", "60"], "properties": ["woman, laugh, baby", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "some men converse over an engine running"], "sample_ids": ["vKrYfzleLB8", "sCiy7QS1U"], "start_seconds": ["110", "300"], "properties": ["a, ring, gunshots", "men, converse, engine"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", null], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is more calm", "label": 1}, {"captions": ["a baby laugh at a sputter", "some men converse over an engine running"], "sample_ids": ["sLUnaPT5gM8", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["laugh, sputter, baby", "men, converse, engine"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", null], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a group of people", "label": 1}, {"captions": ["a person sniffles and sneezes", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["uRlbY6aoBU", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["sneezes, sniffles, person", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is sneezing ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "multiple people speak and children yell while water gurgles"], "sample_ids": ["xZepNM9qcRA", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["background, motor, run", "multiple, people, yell"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", null], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a man speaks while video game music plays with some clicking", "a clock ticktocks"], "sample_ids": ["tw76HGONaKg", "v-g-j2uTByM"], "start_seconds": ["570", "30"], "properties": ["music, click, man", "ticktocks, clock, ticktocks"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "birds chirp and objects are moved around"], "sample_ids": ["rwtmaKiCcQU", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["nozzle, depressed, spray can", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["spraying and people speaking", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["v5P-ThUCINM", "tDVADusiIoc"], "start_seconds": ["400", "60"], "properties": ["background, chirp, bird", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["wnpJndXuxLc", "uZesmtKZGSw"], "start_seconds": ["50", "250"], "properties": ["blows, vehicle, train", "men, talk, cars"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about a train?", "label": 0}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "food is frying while a woman speaks"], "sample_ids": ["vlS6YMeWAPo", "yhQ2Lg-7qDY"], "start_seconds": ["40", "130"], "properties": ["sheep, baa, birds", "food, woman, speak"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "a pan filled with meat and sauce being cooked on a stove top"], "captions_pred_audio": ["a goat bleats and birds chirp", "a faucet is running and a man is speaking"], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["a man speaks as a machine runs", "a vehicle engine accelerating then running on idle"], "sample_ids": ["vD6lYD1l0BY", "vYkA3cfXp5Q"], "start_seconds": ["330", "30"], "properties": ["a, machine, run", "engine, accelerate, idle"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a woman speaks and food sizzles while frying", "waves crash against a shoreline and people speak"], "sample_ids": ["wTideSjRFS0", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["food, sizzle, woman", "wave, crash, shoreline"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more active", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "a infant makes noise and is excited"], "sample_ids": ["wTjoRj1se3U", "wIJK3-5y0kA"], "start_seconds": ["390", "30"], "properties": ["engine, run, people", "noise, excited, infant"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a jet engine is running and people are talking", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "people speak as gunfire rings out"], "sample_ids": ["sLUnaPT5gM8", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["loud, laughter, intermittent", "gunfire, ring, speak"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["tapping occurs then a baby cries", "a man speaks as a car is passing by"], "sample_ids": ["wIJK3-5y0kA", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["a, cry, baby", "a, car, pass"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "a man speaking with light rustling"], "sample_ids": ["sapQIQUhFc", "zOZleIRqZm4"], "start_seconds": ["280", "80"], "properties": ["water, trickles, flow", "light, rustling, man"], "captions_pred_video": [null, "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a man is speaking with crickets chirping in the background"], "question": "which entity is a man speaking while water trickles and flows?", "label": 0}, {"captions": ["music plays followed by gunshots and then an explosion", "dishes cling together then a man begins to speak"], "sample_ids": ["xKB8O8LTs6s", "sQGXqGcwOTc"], "start_seconds": ["70", "3"], "properties": ["music, gunshots, explosion", "cling, speak, dishes"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "mechanisms are operating and water is splashing "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["a person is whistling", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["sIXTftIuUgw", "y8WEcpOlT3I"], "start_seconds": ["90", "40"], "properties": ["person, whistling, person", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["a person whistling a song", "a man is speaking with wind noise in the background "], "question": "which entity is a person", "label": 0}, {"captions": ["a vehicle accelerates and squeals tires", "a infant makes noise and is excited"], "sample_ids": ["yRx9txMcBl0", "wIJK3-5y0kA"], "start_seconds": ["40", "30"], "properties": ["accelerates, tires, squeals", "noise, excited, infant"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "continuous sizzling with a woman speaking towards the end"], "sample_ids": ["w-4gHptFNuU", "ukxt9I7eMMg"], "start_seconds": ["21", "30"], "properties": ["engine revs, accelerates, bump", "continuous, woman, speaking"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "footage of a person preparing food on a stool in a kitchen"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman is speaking while food is frying in the background "], "question": "which entity is a video of a motorcycle?", "label": 0}, {"captions": ["a woman speaks as frying food sizzles", "several insects fly while two men talk"], "sample_ids": ["wTideSjRFS0", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["food, sizzle, woman", "several, fly, men"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more moving parts", "label": 1}, {"captions": ["birds vocalize and a man speaks", "a car revs and accelerates loudly and men and women chatter among themselves"], "sample_ids": ["v0wPrLBI3hg", "y8dSeubCNI"], "start_seconds": ["30", "4"], "properties": ["vocalize, bird, speak", "men, women, car"], "captions_pred_video": ["footage of the pigeons feeding on the ground", null], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "an engine revving and people talking in the background"], "question": "which entity is more quiet", "label": 0}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["s4Uz1Ffgo04", "y8WEcpOlT3I"], "start_seconds": ["100", "40"], "properties": ["water, rushes, motorcycle", "harsh, wind, blows"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking with wind noise in the background "], "question": "which entity is a video of a motorcycle zooming by in the distance?", "label": 0}, {"captions": ["there are rhythmical snoring nearby", "speaking following by laughing and clapping"], "sample_ids": ["ujMt0-D-x2k", "u2f5NpsoHBg"], "start_seconds": ["0", "30"], "properties": ["snoring, rhythmical, nearby", "person, laugh, clap"], "captions_pred_video": ["of the dog playing with a toy on the floor", "is being projected on a screen at the front of the stage"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a crowd is clapping"], "question": "which entity is more likely to be a person", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a man speaks as a car is passing by"], "sample_ids": ["zY3icUyMdh8", "sK4u5T8hW78"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "a, car, pass"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vZAw4apG0Es", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["background, tick, repeat", "loud, multiple, distance"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["su6FAOcOA8c", "wDVMhEdTiVw"], "start_seconds": ["4", "30"], "properties": ["engine, run, woman", "gun, shoot, water"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used for hunting", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vzxHnu-SFEw", "vb1fPSDI4c"], "start_seconds": ["80", "30"], "properties": ["two objects, woman, speak", "multiple, people, yell"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", null], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "distant men speak as a spray can nozzle is depressed"], "sample_ids": ["tDVADusiIoc", "rwtmaKiCcQU"], "start_seconds": ["60", "30"], "properties": ["wind, radio, waves", "nozzle, depressed, spray can"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "shows a man spraying paint on a wall with a spray gun"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "spraying and people speaking"], "question": "which entity is a spray can?", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "a man speaks while turning a water faucet on"], "sample_ids": ["w6RTHR6AeAg", "vf9xf3vMsGM"], "start_seconds": ["40", "540"], "properties": ["call, owl, screech", "A man speaks while turning a water faucet on."], "captions_pred_video": [null, "of the person washing their hands under the faucet"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a man is speaking while water is running in the background"], "question": "which entity is a human", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "a duck quacks continuously"], "sample_ids": ["x6ijhqRY38s", "vh30P49Po6s"], "start_seconds": ["250", "30"], "properties": ["bowl, silverware, man", "quacks, continuously, duck"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "children speak and play together"], "sample_ids": ["vdoxuJn9lTc", "yVVP8XvWJTo"], "start_seconds": ["40", "260"], "properties": ["burp, loud, girl", "children, speak, play"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "footage of a playground at a school or daycare center"], "captions_pred_audio": ["a child speaks followed by a burp", "children are speaking and breathing with background noise "], "question": "which entity is more social", "label": 1}, {"captions": ["rain falls on a surface as men speak and thunder roars", "a woman speaks as she rubs two objects together"], "sample_ids": ["w0xsN8X18Y", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["rain, thunder, surface", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a process", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "a stream of water flows as people talk and wind blows"], "sample_ids": ["w2JXXIAdUdg", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["snoring, distance, person", "stream, water, flow"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "footage is blurry and out of focus"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing?", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["vh30P49Po6s", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["loud, continuous, quacks", "engine, revs, vehicle"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a duck is quacking loudly", "a race car accelerates and revs its engine "], "question": "which entity is quieter", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["xjhAnI2q6hM", "uEU-Hg5MTN8"], "start_seconds": ["6", "27"], "properties": ["engine revs, vehicle, people", "animal, grunts, snorts"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "people cheer as a vehicle engine revs"], "sample_ids": ["spYNpeN7rPY", "xjhAnI2q6hM"], "start_seconds": ["1", "6"], "properties": ["a clock, ticktock, man", "engine revs, vehicle, people"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a man speaks as a machine runs", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vD6lYD1l0BY", "wz7N8YRy74I"], "start_seconds": ["330", "30"], "properties": ["a, machine, run", "rooster, crow, background, men"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "a woman speaks happily and an animal chirps"], "sample_ids": ["xERFUeZONz8", "uWAAAL4CIoc"], "start_seconds": ["0", "0"], "properties": ["ring, approach, traffic", "a woman, chirps, animal"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", null], "captions_pred_audio": ["an emergency vehicle siren blares", "a woman is speaking and a dog is barking "], "question": "which entity is more calming", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["u7C-AEBQM", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["ticks, rhythmic, quiet", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a ticktock of a clock", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a person speaks briefly", "birds chirp and objects are moved around"], "sample_ids": ["zOZleIRqZm4", "yPUYU6t3rwo"], "start_seconds": ["80", "370"], "properties": ["person, talk, brief", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["xjvTpk2Zpr8", "zj2R0XoFr5k"], "start_seconds": ["70", "50"], "properties": ["engine, run, wind", "airplane, boy, fly"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "a propeller rotates loudly and intensely"], "sample_ids": ["sZPuqDgX2V0", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["commentator, race, track", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a child yells and another yells", "a man speaks followed by another man speaking outside"], "sample_ids": ["vMDHu7Lxcgw", "viuTg1M-dqg"], "start_seconds": ["410", "30"], "properties": ["two, yell, child", "two men, speak, follow"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a man is speaking with background noise and breathing sounds "], "question": "which entity shows two people speaking?", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "a man talks nearby and another man talks far away while some liquid flows"], "sample_ids": ["x6ijhqRY38s", "sapQIQUhFc"], "start_seconds": ["250", "280"], "properties": ["bowl, silverware, man", "liquid, flow, distance"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", null], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a man is speaking and a stream is flowing in the background "], "question": "which entity shows a man speaking?", "label": 0}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "a car accelerates and wind blows"], "sample_ids": ["w-4gHptFNuU", "u0TrcHhkPQ"], "start_seconds": ["21", "20"], "properties": ["engine revs, accelerates, bump", "accelerates, wind, blows"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a clock ticktocks continuously", "a woman speaks with water running"], "sample_ids": ["vlJS7LN2XyM", "wTideSjRFS0"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, ticktocks continuously", "water, running, woman"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["a ticktock of a clock", "a woman is speaking while water is running in the background"], "question": "which entity is a clock", "label": 0}, {"captions": ["there are rhythmical snoring nearby", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["ujMt0-D-x2k", "wSVhSdj0F0"], "start_seconds": ["0", "10"], "properties": ["snoring, rhythmical, nearby", "horn honks, keys jingle, electronic beep"], "captions_pred_video": ["of the dog playing with a toy on the floor", null], "captions_pred_audio": ["a person is snoring loudly", "a car horn honks and keys jangle with background noise "], "question": "which entity is not rhythmical", "label": 1}, {"captions": ["a man talks as several small engines run", "winds blows roughly as a vehicle races past"], "sample_ids": ["u9A6VZQCZpU", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["a, man, talk", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a jet engine roars and wind blows "], "question": "which entity is about a vehicle racing past?", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["wSVhSdj0F0", "ukg5L09Wpvo"], "start_seconds": ["10", "150"], "properties": ["horn honks, keys jingle, slam", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a man speaks while water drains", "someone snores nearby"], "sample_ids": ["vSeGhaZt-aI", "spJCm8tD9Zo"], "start_seconds": ["50", "90"], "properties": ["water, drain, man", "someone snores, nearby, someone"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["material crumbles into a microphone", "a clock ticktocks"], "sample_ids": ["vofpvUo6NAw", "v-g-j2uTByM"], "start_seconds": ["220", "30"], "properties": ["material, crumbles, microphone", "ticktocks, clock, ticktocks"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["vzxHnu-SFEw", "wqZ135Ssz0"], "start_seconds": ["80", "60"], "properties": ["two objects, woman, speak", "two men, woman, birds"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", null], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "a woman talks while a baby cries and a man whispers"], "sample_ids": ["yZrFNS7GFBQ", "smDKStoHBJo"], "start_seconds": ["30", "0"], "properties": ["pigeon, buzzes, insect", "a, talk, baby, cry"], "captions_pred_video": ["of the bird in the cage", "a man holding a crying baby in his arms"], "captions_pred_audio": ["an owl hoots in the background ", "a baby is crying and a woman is speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["women speak and laugh as wind blows", "a propeller rotates loudly and intensely"], "sample_ids": ["un9VQlzgZM", "ugHJF0hfYkg"], "start_seconds": ["5", "10"], "properties": ["wind, speak, laugh", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "pigeons vocalize and birds chirp"], "sample_ids": ["sHbXC6na9hg", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["a person, saw, wood", "vocalize, bird, chirp"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "of the pigeon in the cage"], "captions_pred_audio": ["an engine is idling and vibrating", "a man is speaking and a bee is buzzing"], "question": "which entity is not a person?", "label": 1}, {"captions": ["a person is snoring while sleeping", "dishes cling together then a man begins to speak"], "sample_ids": ["vJrjSeP17yE", "sQGXqGcwOTc"], "start_seconds": ["40", "3"], "properties": ["a person is sleeping, snoring, person", "cling, speak, dishes"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a person snoring loudly", "mechanisms are operating and water is splashing "], "question": "which entity is about a person speaking?", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "an infant crying as a woman laughs"], "sample_ids": ["uiItxDsDMFI", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["wood, piece, saw", "a, laugh, infant"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a saw is being used with background noise ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "dishes cling together then a man begins to speak"], "sample_ids": ["sofxkNWaP0s", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["wind, engine, louder", "cling, speak, dishes"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a stream of water runs briefly", "a stream of water runs briefly"], "sample_ids": ["x-PeY8Yb8M4", "x-PeY8Yb8M4"], "start_seconds": ["300", "300"], "properties": ["stream, water, run", "stream, water, run"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a car is driving on a wet road ", "a car is driving on a wet road "], "question": "which stream of water runs briefly", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "small dogs yip and bark sharply"], "sample_ids": ["siJFXfGWgDk", "v-wcQf4BDY0"], "start_seconds": ["50", "120"], "properties": ["man, woman, vehicle", "bark, yip, sharply"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "an engine runs loudly"], "sample_ids": ["uZesmtKZGSw", "vqZuVbG6-HI"], "start_seconds": ["250", "130"], "properties": ["men, talk, cars", "loud, engine, run"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "a infant makes noise and is excited"], "sample_ids": ["uYT5gxnyMWM", "wIJK3-5y0kA"], "start_seconds": ["50", "30"], "properties": ["a, scream, girl", "noise, excited, infant"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "an infant crying frantically"], "sample_ids": ["tDVADusiIoc", "zwOBqeFTgiU"], "start_seconds": ["60", "30"], "properties": ["water, radio, man", "cry, infant, frantically"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["tDVADusiIoc", "wqZ135Ssz0"], "start_seconds": ["60", "60"], "properties": ["wind, radio, waves", "two men, woman, birds"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "people speak as gunfire rings out"], "sample_ids": ["vSeGhaZt-aI", "wqTCwqVRDlk"], "start_seconds": ["50", "80"], "properties": ["water, bubbles, run", "gunfire, ring, speak"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking and a gun is fired"], "question": "which entity is more calm", "label": 0}, {"captions": ["a jet engine spools up and takes off", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vBslzh7saPw", "wz7N8YRy74I"], "start_seconds": ["90", "30"], "properties": ["engine, spools, takes", "rooster, crow, background, men"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man speaking to a rooster?", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "wind blows as people chatter quietly"], "sample_ids": ["sofxkNWaP0s", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["wind, engine, louder", "wind, chatter, people"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a vehicle engine accelerating then running on idle"], "sample_ids": ["vzxHnu-SFEw", "vYkA3cfXp5Q"], "start_seconds": ["80", "30"], "properties": ["two objects, woman, speak", "engine, accelerate, idle"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "an engine is idling"], "question": "which is a vehicle", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "some liquid flows while a woman laughs and man talks"], "sample_ids": ["vlS6YMeWAPo", "vddP56-ogds"], "start_seconds": ["40", "30"], "properties": ["noise, bleat, call", "liquid, laughs, man"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", null], "captions_pred_audio": ["a goat bleats and birds chirp", "water is running and gurgling and a man is speaking"], "question": "which entity has a man talking?", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "water flows as men speak and yell"], "sample_ids": ["ziUT9IFTkjg", "vJ7JPEFhyLA"], "start_seconds": ["10", "16"], "properties": ["background, birds, rustling", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["material crumbles into a microphone", "an engine runs loudly"], "sample_ids": ["vofpvUo6NAw", "vqZuVbG6-HI"], "start_seconds": ["220", "130"], "properties": ["material, crumbles, microphone", "loud, engine, run"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "footage is blurry because it's raining outside"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "a propeller rotates loudly and intensely"], "sample_ids": ["sQGXqGcwOTc", "ugHJF0hfYkg"], "start_seconds": ["3", "10"], "properties": ["audio, kid, giggles", "loud, intense, propeller"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a baby cries and a woman speaks", "an insect buzzes around continuously"], "sample_ids": ["tMbMDvT50j8", "v25l1jef3JY"], "start_seconds": ["12", "0"], "properties": ["a, cry, woman", "buzzes, continuously, insect"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a baby cries and a woman speaks", "a fly is buzzing around a microphone "], "question": "which entity is not a person", "label": 1}, {"captions": ["a dog barks and whimpers", "a propeller rotates loudly and intensely"], "sample_ids": ["sShpyu2l4YQ", "ugHJF0hfYkg"], "start_seconds": ["0", "10"], "properties": ["barks, whimpers, dog", "loud, intense, propeller"], "captions_pred_video": ["the puppies are playing with a toy", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a dog is barking and growling", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["bees buzz as wind blows", "waves crash against a shoreline and people speak"], "sample_ids": ["tMJne1a4AFI", "yFB25fqfU8I"], "start_seconds": ["0", "300"], "properties": ["bees, buzz, wind", "wave, crash, shoreline"], "captions_pred_video": ["a swarm of bees on the ground", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "water running down a sink while a man is talking"], "sample_ids": ["tDlysoZiA1I", "vSeGhaZt-aI"], "start_seconds": ["0", "50"], "properties": ["animal, grunt, multiple", "water, sink, talk"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a man is speaking and pouring liquid with background noise "], "question": "which entity is a video of a man talking?", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["wDVMhEdTiVw", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["gun, shoot, water", "a woman, something, fried"], "captions_pred_video": ["a blurry image of trees and water in the forest", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "race cars go around a track as a man commentates"], "sample_ids": ["zofjfKhqLk8", "uZesmtKZGSw"], "start_seconds": ["10", "250"], "properties": ["background, metal, clank", "car, track, man"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking and a car is revving with laughter in the background "], "question": "which is a video of a man commentating", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "water pouring and bubbling"], "sample_ids": ["y8dSeubCNI", "uyRfq-jKPpo"], "start_seconds": ["4", "50"], "properties": ["men, women, car", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["an engine revving and people talking in the background", "water is running from a faucet"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "a man speaks as a car is passing by"], "sample_ids": ["slZLHwNbbt4", "sK4u5T8hW78"], "start_seconds": ["300", "30"], "properties": ["a, horn, run", "a, car, pass"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a baby cries and fusses, a woman speaks, and a man speaks"], "sample_ids": ["tDVADusiIoc", "wyllXV6PjKo"], "start_seconds": ["60", "30"], "properties": ["water, radio, man", "a baby, a woman, a man"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman speaks and a baby cries"], "question": "which entity is a group of people", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["ugHJF0hfYkg", "su6FAOcOA8c"], "start_seconds": ["10", "4"], "properties": ["engine, running, continuously", "engine, idle, woman"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and a subway train is moving "], "question": "which entity has an engine that is running continuously?", "label": 0}, {"captions": ["a stream of water flows quickly", "a helicopter engine idles continuously"], "sample_ids": ["wbHTKEJZyhc", "ugHJF0hfYkg"], "start_seconds": ["20", "10"], "properties": ["stream, water, flow", "engine, idle, continuously"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a helicopter is flying overhead "], "question": "which entity is not moving", "label": 1}, {"captions": ["a clock ticktocks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["v-g-j2uTByM", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["ticktocks, clock, ticktocks", "water, radio, man"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a clock is ticking loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a clock?", "label": 1}, {"captions": ["a person is burping while a girl speaks", "a woman speaks over sizzling noise"], "sample_ids": ["vdoxuJn9lTc", "yajyRTUQk3U"], "start_seconds": ["40", "400"], "properties": ["person, burp, girl", "noise, woman, speak"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a child speaks followed by a burp", "a woman is speaking while food is frying in the background"], "question": "which entity is speaking over noise", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["xSKJGCItUWE", "wz7N8YRy74I"], "start_seconds": ["10", "30"], "properties": ["engine, run, boy", "rooster, crow, background, men"], "captions_pred_video": ["footage of the helicopter flying in the room", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster crow?", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sQGXqGcwOTc", "uZesmtKZGSw"], "start_seconds": ["3", "250"], "properties": ["cling, speak, dishes", "men, talk, cars"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["popping and crackling repeats as men yell and laugh", "a female speaks softly as paper crinkles"], "sample_ids": ["rqu8iB22IY", "xvDdE3zNf8Y"], "start_seconds": ["5", "120"], "properties": ["sound, repeats, laugh", "a, female, speaks"], "captions_pred_video": [null, "of a woman in a white shirt and glasses holding a purple tie"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a woman speaks and crumples paper"], "question": "which entity has a female speaking softly?", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["vuUVPzd2FXw", "w34HjHr6gAY"], "start_seconds": ["160", "30"], "properties": ["a, steam, release", "beeps, hit, woman"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "a stream of water flows as people talk and wind blows"], "sample_ids": ["zofjfKhqLk8", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["noise, stop, motor", "stream, water, flow"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage is blurry and out of focus"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["people speak then an engine runs", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["uMTTDZ2mb4", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["engine, run, people", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "roadway noise occurs and a truck accelerates"], "sample_ids": ["yDoT73BWsdA", "tgbONvsP47Y"], "start_seconds": ["10", "0"], "properties": ["engine, revs, vehicle", "noise, truck, accelerate"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a car is driving on the road "], "question": "which vehicle is moving", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sG7TyPnFDR0", "xKB8O8LTs6s"], "start_seconds": ["180", "70"], "properties": ["beeps, machine, smoke alarm", "music, gunfire, explosion"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["siJFXfGWgDk", "zFjIWfSD-4"], "start_seconds": ["50", "410"], "properties": ["man, woman, vehicle", "People, motor, brakes"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", null], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has more people", "label": 1}, {"captions": ["people clap and speak in the distance", "people cheer as a vehicle engine revs"], "sample_ids": ["wwyfGO2J4", "xjhAnI2q6hM"], "start_seconds": ["90", "6"], "properties": ["clap, distance, speak", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["someone snores nearby", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["spJCm8tD9Zo", "yajyRTUQk3U"], "start_seconds": ["90", "400"], "properties": ["someone snores, nearby, someone", "a woman, something, fried"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "a train horn blows as it passes by"], "sample_ids": ["siJFXfGWgDk", "zVacuqSb4LI"], "start_seconds": ["50", "30"], "properties": ["man, woman, vehicle", "horn, blows, train"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train?", "label": 1}, {"captions": ["a man speaking with light rustling", "a car accelerates and wind blows"], "sample_ids": ["zOZleIRqZm4", "u0TrcHhkPQ"], "start_seconds": ["80", "20"], "properties": ["light, rustling, man", "accelerates, wind, blows"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man talks as several small engines run", "three men talk while wind blows and some liquid flows"], "sample_ids": ["u9A6VZQCZpU", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["a, man, talk", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more people", "label": 1}, {"captions": ["continuous sneezing together with speech", "three men talk while wind blows and some liquid flows"], "sample_ids": ["x4dZyf9Gbj0", "vJ7JPEFhyLA"], "start_seconds": ["130", "16"], "properties": ["continuous, sneeze, speech", "three men, wind, flow"], "captions_pred_video": ["footage is blurry and out of focus", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman sneezes and speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more likely to be a video of a person sneezing?", "label": 0}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vfYTJq7nU", "uEU-Hg5MTN8"], "start_seconds": ["130", "27"], "properties": ["ducks, quack, man", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a joke", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "continuous snoring"], "sample_ids": ["tw76HGONaKg", "sLkeqCDJIyw"], "start_seconds": ["570", "120"], "properties": ["A, game, keyboard", "loud, snoring, noise"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", ", what is the man doing on the couch? sleeping"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a person is snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a train engine runs and a horn blows", "wind blowing followed by a zoom"], "sample_ids": ["zPX9o1uDiI", "vr8ZXjEBhMQ"], "start_seconds": ["40", "150"], "properties": ["engine, horn, run", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "water flows as men speak and yell"], "sample_ids": ["sjlVMgdGSK0", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["accelerates, vehicle, race car", "water, flow, men"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a moving object", "label": 0}, {"captions": ["food fries in a pan as someone talks and cooks", "a duck quacks continuously"], "sample_ids": ["ukxt9I7eMMg", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["food, pan, cook", "quacks, continuously, duck"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "water splashes as an animal walks through"], "sample_ids": ["vddP56-ogds", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["liquid, laughs, man", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vdoxuJn9lTc", "uZesmtKZGSw"], "start_seconds": ["40", "250"], "properties": ["burp, loud, girl", "men, talk, cars"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a child speaks followed by a burp", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["birds chirp as a bell rings", "waves crash against a shoreline and people speak"], "sample_ids": ["ziUT9IFTkjg", "yFB25fqfU8I"], "start_seconds": ["10", "300"], "properties": ["chirp, bell, ring", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more active", "label": 1}, {"captions": ["tapping occurs then a baby cries", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wIJK3-5y0kA", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["a, cry, baby", "music, gunfire, explosion"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a baby cries and a woman speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["a person snoring several times", "a frog croaks as other frogs croak in the background"], "sample_ids": ["spJCm8tD9Zo", "yswmmRZFItk"], "start_seconds": ["90", "0"], "properties": ["snore, person, several", "background, frog, croak"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a close up of a frog in the water"], "captions_pred_audio": ["a person is snoring loudly", "a frog is croaking"], "question": "which entity is a croaking animal?", "label": 1}, {"captions": ["food is frying and sizzles", "someone is typing on a computer keyboard"], "sample_ids": ["zNRChLjqcU", "v0x1odnXtP0"], "start_seconds": ["220", "210"], "properties": ["food is frying, sizzles, food", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["water is running from a faucet into a sink", "a person is typing on a keyboard"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a beep occurs briefly", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["xtWeJ56-U-g", "zj2R0XoFr5k"], "start_seconds": ["20", "50"], "properties": ["beep, occur, briefly", "airplane, boy, fly"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a train horn sounds and railroad crossing ring", "people speak in a closed space"], "sample_ids": ["s7knHCFW82w", "sTpirNYo8vQ"], "start_seconds": ["30", "30"], "properties": ["horn, sound, train", "people, space, speak"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "of a man taking a selfie on a bus"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a man is speaking while a car is revving and accelerating "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "a man speaks as a motor runs in the background"], "sample_ids": ["sfAvvZwdLCY", "xZepNM9qcRA"], "start_seconds": ["20", "30"], "properties": ["flushes, drains, water", "background, motor, run"], "captions_pred_video": ["footage of the toilet in the bathroom", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a toilet is flushed", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a stream of water runs briefly", "a car speeding up in the distance"], "sample_ids": ["x-PeY8Yb8M4", "u0TrcHhkPQ"], "start_seconds": ["300", "20"], "properties": ["stream, water, run", "distance, car, speed"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", null], "captions_pred_audio": ["a car is driving on a wet road ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["w6RTHR6AeAg", "zj2R0XoFr5k"], "start_seconds": ["40", "50"], "properties": ["call, owl, screech", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["rain falls on a surface as men speak and thunder roars", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["w0xsN8X18Y", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["rain, thunder, surface", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a sleeping person emits a gravely snore", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["w2JXXIAdUdg", "tDVADusiIoc"], "start_seconds": ["10", "60"], "properties": ["emits, sleeping, person", "water, radio, man"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["children cry and people talk", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["xLwHe825Zs", "w5W5Kqtc8E"], "start_seconds": ["18", "100"], "properties": ["people talk, children cry, people talk", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby cries and a woman speaks", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running?", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["yaln9y8I7ms", "xBxDz0CFVn0"], "start_seconds": ["230", "30"], "properties": ["female, flushes, toilet", "stream, water, flow"], "captions_pred_video": ["footage is blurry and out of focus", "footage is blurry and out of focus"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "a man speaks as a car is passing by"], "sample_ids": ["uZesmtKZGSw", "sK4u5T8hW78"], "start_seconds": ["250", "30"], "properties": ["car, track, man", "a, car, pass"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking to a car passing by?", "label": 1}, {"captions": ["roadway noise occurs and a truck accelerates", "water flows and trickles"], "sample_ids": ["tgbONvsP47Y", "tB7hWb9gTuQ"], "start_seconds": ["0", "30"], "properties": ["noise, truck, accelerate", "water, flow, trickle"], "captions_pred_video": ["footage of a fire truck entering a garage", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a car is driving on the road ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a vehicle engine accelerating then running on idle", "a car accelerates and wind blows"], "sample_ids": ["vYkA3cfXp5Q", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["engine, accelerate, idle", "accelerates, wind, blows"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", null], "captions_pred_audio": ["an engine is idling", "a race car accelerates and revs its engine "], "question": "which entity is a car?", "label": 1}, {"captions": ["a toilet flushes and water sputters as it drains", "an airplane engine spools and people speak"], "sample_ids": ["smGI3C1NZc", "wTjoRj1se3U"], "start_seconds": ["30", "390"], "properties": ["water, drain, toilet", "airplane, engine, spool"], "captions_pred_video": [null, "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a toilet is flushed", "a jet engine is running and people are talking"], "question": "which entity is a machine", "label": 1}, {"captions": ["a train engine runs and a horn blows", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["zPX9o1uDiI", "w5W5Kqtc8E"], "start_seconds": ["40", "100"], "properties": ["engine, horn, run", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["wz7N8YRy74I", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["rooster, crow, background, men", "two men, woman, birds"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", null], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more birds", "label": 1}, {"captions": ["speaking following by laughing and clapping", "vehicles pass by on a roadway"], "sample_ids": ["u2f5NpsoHBg", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["person, laugh, clap", "pass, vehicle, roadway"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a person whistles and clicks a mouse", "small dogs yip and bark sharply"], "sample_ids": ["zCrAfDfv6-A", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["person, mouse, click", "bark, yip, sharply"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a person whistles a song", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["material crumbles into a microphone", "vehicles pass by on a roadway"], "sample_ids": ["vofpvUo6NAw", "tgbONvsP47Y"], "start_seconds": ["220", "0"], "properties": ["material, crumbles, microphone", "pass, vehicle, roadway"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "footage of a fire truck entering a garage"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["zgUgkpk78xU", "zFjIWfSD-4"], "start_seconds": ["70", "410"], "properties": ["clinking, humming, horn", "People, motor, brakes"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", null], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a train?", "label": 0}, {"captions": ["white noise and birds chirping", "several insects fly while two men talk"], "sample_ids": ["wRBHTgrbiwg", "s-T9OVOiMLo"], "start_seconds": ["50", "330"], "properties": ["noise, white, chirping", "several, fly, men"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "paper is crumpling consistently"], "sample_ids": ["xzKKf9bKNUo", "v5cSxLaHADY"], "start_seconds": ["10", "0"], "properties": ["background, noise, snoring", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a person snoring loudly", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "a man speaks as a motor runs in the background"], "sample_ids": ["xyL9F5VrjkE", "xZepNM9qcRA"], "start_seconds": ["20", "30"], "properties": ["wind, motor, distance", "background, motor, run"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["u5RmF3c3Aw", "tDVADusiIoc"], "start_seconds": ["60", "60"], "properties": ["engine, car, zoom", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a man speaking with light rustling", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["zOZleIRqZm4", "uYT5gxnyMWM"], "start_seconds": ["80", "50"], "properties": ["light, rustling, man", "female, spraying, scream"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking and a baby is crying"], "question": "which entity is a woman?", "label": 1}, {"captions": ["bees buzz and wind blows", "water pouring and bubbling"], "sample_ids": ["tMJne1a4AFI", "uyRfq-jKPpo"], "start_seconds": ["0", "50"], "properties": ["bees buzz, wind blows, bees", "water, bubbles, pouring"], "captions_pred_video": ["a swarm of bees on the ground", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a swarm of bees buzzing around", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["w5W5Kqtc8E", "vfYTJq7nU"], "start_seconds": ["100", "130"], "properties": ["wind, engine, scream", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a duck quacks and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["an audience gives applause", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["x6iCUDmRpKQ", "yajyRTUQk3U"], "start_seconds": ["38", "400"], "properties": ["applause, audience, give", "a woman, something, fried"], "captions_pred_video": ["a black background with the moon and stars in the sky", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a group of people are clapping and cheering", "a woman is speaking while food is frying in the background"], "question": "which entity is a demonstration", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "a woman talking as an infant is crying"], "sample_ids": ["vfYTJq7nU", "tMbMDvT50j8"], "start_seconds": ["130", "12"], "properties": ["rustling, ducks, quack", "a, talk, infant"], "captions_pred_video": [null, "shows a little girl covering her face with her hands while sitting at a table"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a baby cries and a woman speaks"], "question": "which entity is a person talking to an infant?", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "a man speaks followed by another man speaking outside"], "sample_ids": ["siJFXfGWgDk", "viuTg1M-dqg"], "start_seconds": ["50", "30"], "properties": ["a, bird, vehicle", "two men, speak, follow"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "a clock ticktocks"], "sample_ids": ["wz7N8YRy74I", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["rooster, crow, background, men", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "a car speeding up in the distance"], "sample_ids": ["y8WEcpOlT3I", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["wind, speak, buffeting", "distance, car, speed"], "captions_pred_video": ["on how to use a sewing machine youtube", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a airplane flies overhead as a woman speaks", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["zj2R0XoFr5k", "uYT5gxnyMWM"], "start_seconds": ["50", "50"], "properties": ["airplane, fly, woman", "female, spraying, scream"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a woman speaking?", "label": 0}, {"captions": ["a saw finishes running as metal clings in the background", "multiple people speak and children yell while water gurgles"], "sample_ids": ["zofjfKhqLk8", "vb1fPSDI4c"], "start_seconds": ["10", "30"], "properties": ["background, metal, clings", "multiple, people, yell"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vbpKkWvfOu4", "vJ7JPEFhyLA"], "start_seconds": ["560", "16"], "properties": ["a, man, speaks", "three men, wind, flow"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "paper is crumpling consistently"], "sample_ids": ["yNtRmrn0io8", "v5cSxLaHADY"], "start_seconds": ["210", "0"], "properties": ["storm, distance, strike", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a house in the middle of the night", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["rain falls and thunder roars", "paper is crumpled and crinkled"], "question": "which entity is not a storm?", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "paper folding and crinkling"], "sample_ids": ["shmR4OZtzqA", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["man, engine, idle", "paper, fold, crinkle"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man speaks while a motor runs", "the wind blows and a mouse clicks "], "question": "which is not a vehicle", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "three men talk while wind blows and some liquid flows"], "sample_ids": ["w0xsN8X18Y", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["music, surface, rain", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more liquid flowing", "label": 1}, {"captions": ["a motor runs and stops, and animals squawk and croak", "a frog croaks as other frogs croak in the background"], "sample_ids": ["s4tUs779vBA", "yswmmRZFItk"], "start_seconds": ["160", "0"], "properties": ["a, sound, stop", "background, frog, croak"], "captions_pred_video": ["game in 10 words or less - screenshot 1", "a close up of a frog in the water"], "captions_pred_audio": ["a car is revving and a man is speaking ", "a frog is croaking"], "question": "which entity has more frogs croaking in the background", "label": 1}, {"captions": ["a vehicle accelerates squealing tires", "someone is typing on a computer keyboard"], "sample_ids": ["sd7xVssqlw", "v0x1odnXtP0"], "start_seconds": ["50", "210"], "properties": ["accelerates, tires, squealing", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a person is typing on a keyboard"], "question": "which entity is stationary", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "a woman speaks as she rubs two objects together"], "sample_ids": ["sTpirNYo8vQ", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["a, tone, fast", "two objects, woman, speak"], "captions_pred_video": ["of a man taking a selfie on a bus", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which woman speaks as she rubs two objects together?", "label": 1}, {"captions": ["a infant makes noise and is excited", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["wIJK3-5y0kA", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["noise, excited, infant", "loud, laughter, intermittent"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a baby cries and a woman speaks", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man speaking with light rustling", "a person snores loudly multiple times at a close distance"], "sample_ids": ["zOZleIRqZm4", "sSMl2vc3ek"], "start_seconds": ["80", "20"], "properties": ["light, rustling, man", "loud, multiple, distance"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["xfudFO976zE", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["animal, bleats, cry", "female, spraying, scream"], "captions_pred_video": ["footage is blurry and shaky", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "a person snores loudly multiple times at a close distance"], "sample_ids": ["u6jIvCtKarQ", "sSMl2vc3ek"], "start_seconds": ["70", "20"], "properties": ["a, man, speaks", "loud, multiple, distance"], "captions_pred_video": ["footage of a person using a blender on a stove top", null], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "paper is crumpling consistently"], "sample_ids": ["uiItxDsDMFI", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["wood, piece, saw", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a saw is being used with background noise ", "paper is crumpled and crinkled"], "question": "which object is being crumpled", "label": 0}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a man speaks over intermittent keyboard taps"], "sample_ids": ["yLy-WycbVVE", "tw76HGONaKg"], "start_seconds": ["30", "570"], "properties": ["background, people, talk", "audio, man, keyboard"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a man speaks and types on a computer keyboard "], "question": "which entity has a more calming background", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "wind blows as people chatter quietly"], "sample_ids": ["zY3icUyMdh8", "xBxDz0CFVn0"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "wind, chatter, people"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage is blurry and out of focus"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "a propeller rotates loudly and intensely"], "sample_ids": ["wsHBIgzs9Fs", "ugHJF0hfYkg"], "start_seconds": ["50", "10"], "properties": ["horn, continuous, buzzing", "loud, intense, propeller"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xyx6eNVEYRY", "tDVADusiIoc"], "start_seconds": ["380", "60"], "properties": ["loud, engine, muffles", "water, radio, man"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a man speaks as a machine runs", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vD6lYD1l0BY", "wDVMhEdTiVw"], "start_seconds": ["330", "30"], "properties": ["a, machine, run", "gun, shoot, water"], "captions_pred_video": ["game controller being held in the hands of the person", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a gun", "label": 1}, {"captions": ["an insect buzzes around continuously", "someone snores nearby"], "sample_ids": ["v25l1jef3JY", "spJCm8tD9Zo"], "start_seconds": ["0", "90"], "properties": ["buzzes, continuously, insect", "someone snores, nearby, someone"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a person is snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "a horn rings out as a machine runs by"], "sample_ids": ["vW4x7S1VfQc", "slZLHwNbbt4"], "start_seconds": ["150", "300"], "properties": ["clacking, oil, woman", "a, horn, run"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["food sizzles in a frying pan", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is about a machine running by?", "label": 1}, {"captions": ["a man speaks as a machine runs", "people applaud and hoot and chat quietly"], "sample_ids": ["vD6lYD1l0BY", "wwyfGO2J4"], "start_seconds": ["330", "90"], "properties": ["a, machine, run", "people, applaud, hoot"], "captions_pred_video": ["game controller being held in the hands of the person", null], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a person sniffles and sneezes", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["uRlbY6aoBU", "y8WEcpOlT3I"], "start_seconds": ["0", "40"], "properties": ["sneezes, sniffles, person", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is sneezing ", "a man is speaking with wind noise in the background "], "question": "which entity is more likely to be a person", "label": 0}, {"captions": ["a bird is chirping and tweeting a bird song", "a toilet flushes and a female speaks"], "sample_ids": ["wPz6QRAkEb4", "yaln9y8I7ms"], "start_seconds": ["60", "230"], "properties": ["chirps, tweets, song", "female, flushes, toilet"], "captions_pred_video": ["a bird in a cage on top of a pole", "footage is blurry and out of focus"], "captions_pred_audio": ["birds are chirping in the background ", "a toilet flushes and a man speaks"], "question": "which entity is not a bird?", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "a woman speaks and other women and a man talk with her"], "sample_ids": ["w2M4i1mklOA", "vbpKkWvfOu4"], "start_seconds": ["30", "560"], "properties": ["alarm, gears, turn", "a, woman, man"], "captions_pred_video": ["footage of an antique clock", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a woman is speaking and a man is speaking"], "question": "which entity has more people", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["smDKStoHBJo", "zFjIWfSD-4"], "start_seconds": ["0", "410"], "properties": ["a, talk, baby, cry", "People, motor, brakes"], "captions_pred_video": ["a man holding a crying baby in his arms", null], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running?", "label": 1}, {"captions": ["water is sprayed across a hard surface", "a man speaks as a car is passing by"], "sample_ids": ["sQwlkXjQabo", "sK4u5T8hW78"], "start_seconds": ["10", "30"], "properties": ["water, spray, surface", "a, car, pass"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["spraying followed by silence", "a man is speaking with background noise and breathing sounds "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "a heavy rain falls endlessly"], "sample_ids": ["zALy31PjDl0", "wP8ZKrlx3oA"], "start_seconds": ["21", "40"], "properties": ["a man, a vehicle, a horn", "heavy, rain, fall"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "footage of a flooded street in the middle of a desert with mountains in the background"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a heavy rain is falling on a surface"], "question": "which entity is a weather event", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["wqUmIEzuNz4", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["frog, bird, vocalize", "men, talk, cars"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a cat meows and rustles", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a vehicle engine revs as the vehicle passes", "a woman speaks as she rubs two objects together"], "sample_ids": ["yDoT73BWsdA", "vzxHnu-SFEw"], "start_seconds": ["10", "80"], "properties": ["engine, revs, vehicle", "two objects, woman, speak"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a train horn sounds as the train approaches", "three men talk while wind blows and some liquid flows"], "sample_ids": ["slZLHwNbbt4", "vJ7JPEFhyLA"], "start_seconds": ["300", "16"], "properties": ["train, horn, sound", "three men, wind, flow"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a train?", "label": 1}, {"captions": ["electronic beeps occur in a short series", "paper folding and crinkling"], "sample_ids": ["y682ml90jGw", "zPpG3RD8lSs"], "start_seconds": ["11", "20"], "properties": ["beeps, series, electronic", "paper, fold, crinkle"], "captions_pred_video": [null, "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a beeping sound is being made ", "the wind blows and a mouse clicks "], "question": "which entity is not a series of beeps", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["yYEVLuqEytU", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["grunt, slurp, background", "a, scream, girl"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a male speaks and another male speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["viuTg1M-dqg", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["two males, speaking, male", "rooster, crow, background, men"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "birds chirp and objects are moved around"], "sample_ids": ["sG7TyPnFDR0", "yPUYU6t3rwo"], "start_seconds": ["180", "370"], "properties": ["beeps, machine, smoke alarm", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "people speak as gunfire rings out"], "sample_ids": ["wjsXBsc7M40", "wqTCwqVRDlk"], "start_seconds": ["10", "80"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "gunfire, ring, speak"], "captions_pred_video": ["footage of the baby playing with a toothbrush", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking and a gun is fired"], "question": "which entity is more calm", "label": 0}, {"captions": ["a woman speaks with water running", "some men converse over an engine running"], "sample_ids": ["wTideSjRFS0", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["water, running, woman", "men, converse, engine"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a woman speaking with water running?", "label": 0}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["yZmhM1HcsyE", "uZesmtKZGSw"], "start_seconds": ["4", "250"], "properties": ["engine, roar, water", "men, talk, cars"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more likely to be in a race", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a man speaks with another voice speaking in the background"], "sample_ids": ["vZAw4apG0Es", "u21-Z5gJCB8"], "start_seconds": ["30", "30"], "properties": ["background, tick, repeat", "background, voice, man"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "- a person cooking eggs in a pan on the stove"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking in the background?", "label": 0}, {"captions": ["birds chirp and a dog breathes heavily", "people applaud and hoot and chat quietly"], "sample_ids": ["y2ZBGpgbhHM", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["dog, chirp, breathe", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds chirping and a dog panting", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as a car is passing by", "white noise and snoring with some rustling in the background"], "sample_ids": ["sK4u5T8hW78", "xzKKf9bKNUo"], "start_seconds": ["30", "10"], "properties": ["a, car, pass", "background, noise, snoring"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "shows a woman laying on a bed with her eyes closed and her mouth open"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a person snoring loudly"], "question": "which entity has a car passing by", "label": 0}, {"captions": ["a propeller rotates loudly and intensely", "people cheer as a vehicle engine revs"], "sample_ids": ["ugHJF0hfYkg", "xjhAnI2q6hM"], "start_seconds": ["10", "6"], "properties": ["loud, intense, propeller", "engine revs, vehicle, people"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a helicopter is flying overhead ", "a truck is revving its engine and a man is speaking "], "question": "which is louder", "label": 0}, {"captions": ["ticking continues without interruption", "some tunes played by whistling"], "sample_ids": ["v-g-j2uTByM", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["ticking, continuous, clock", "tune, play, whistling"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a clock is ticking loudly", "a person whistling a song"], "question": "which entity is not continuous", "label": 1}, {"captions": ["a helicopter engine runs", "an insect buzzes around continuously"], "sample_ids": ["t5ZbXbniOWk", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["engine, helicopter, run", "buzzes, continuously, insect"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a helicopter is flying overhead ", "a fly is buzzing around a microphone "], "question": "which entity is not a helicopter?", "label": 1}, {"captions": ["a woman and man are speaking", "someone whistles a tune"], "sample_ids": ["vbpKkWvfOu4", "sIXTftIuUgw"], "start_seconds": ["560", "90"], "properties": ["two people, speaking, woman, man", "someone, tune, whistle"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a person whistling a song"], "question": "which entity is a single person", "label": 1}, {"captions": ["goats bleat and metal clings", "a person snores loudly multiple times at a close distance"], "sample_ids": ["tH17JPjDPnc", "sSMl2vc3ek"], "start_seconds": ["260", "20"], "properties": ["bleat, metal, clings", "loud, multiple, distance"], "captions_pred_video": ["feed of the goats eating hay in the barn", null], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "a woman speaks as she rubs two objects together"], "sample_ids": ["siJFXfGWgDk", "vzxHnu-SFEw"], "start_seconds": ["50", "80"], "properties": ["a, bird, vehicle", "two objects, woman, speak"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["an adult woman and an adult man speak", "wind blows as people chatter quietly"], "sample_ids": ["zTLVJCo4WEE", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["two people, adult, speak", "wind, chatter, people"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "birds tweet and squawk"], "sample_ids": ["yI-KvObbDoY", "w1mlz3Pe4fU"], "start_seconds": ["260", "300"], "properties": ["sound, smack, wind", "squawk, tweet, scream"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", "of a bird in a cage"], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "birds are chirping and singing"], "question": "which entity is a bird", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "a car accelerates and wind blows"], "sample_ids": ["vzceMbklWc", "u0TrcHhkPQ"], "start_seconds": ["180", "20"], "properties": ["water, faucet, sink", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and a man is speaking", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "wind blowing followed by a zoom"], "sample_ids": ["y8dSeubCNI", "vr8ZXjEBhMQ"], "start_seconds": ["4", "150"], "properties": ["engine revving, people speaking, motorcycle", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["an engine revving and people talking in the background", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a woman and man are speaking", "a car accelerates and wind blows"], "sample_ids": ["vbpKkWvfOu4", "u0TrcHhkPQ"], "start_seconds": ["560", "20"], "properties": ["two people, speaking, woman, man", "accelerates, wind, blows"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a race car accelerates and revs its engine "], "question": "which is not a person", "label": 1}, {"captions": ["a cat meows and children speak", "a motor vehicle roars, drowning out people speaking in the background"], "sample_ids": ["x5cuQjOdM3E", "s4Uz1Ffgo04"], "start_seconds": ["30", "100"], "properties": ["cat, speak, children", "roars, background, people speaking"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["paper is crumpling consistently", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["v5cSxLaHADY", "uZesmtKZGSw"], "start_seconds": ["0", "250"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "men, talk, cars"], "captions_pred_video": ["footage of the person holding a pair of scissors", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["paper is crumpled and crinkled", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sYITalLZjj4", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["water, rushes, background, birds", "airplane, boy, fly"], "captions_pred_video": ["two ducks are swimming in the water near each other", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["wind blows and birds chirp", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "a wooden clack accompanies nearby chirping birds"], "sample_ids": ["sNB8zxXneIM", "yeFvk9x0wWI"], "start_seconds": ["20", "30"], "properties": ["several, quack, cocks", "clack, bird, chirp"], "captions_pred_video": ["a group of geese in a cage", "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "birds chirp in the background as a car drives by "], "question": "which entity is a bird?", "label": 1}, {"captions": ["a person whistles a meandering tune", "a airplane flies overhead as a woman speaks"], "sample_ids": ["uFoga8sHpiw", "zj2R0XoFr5k"], "start_seconds": ["90", "50"], "properties": ["person, tune, whistle", "airplane, fly, woman"], "captions_pred_video": ["footage of a bird in a cage", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a person whistles a song", "a woman speaks while a helicopter flies overhead "], "question": "which entity is moving", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "winds blows roughly as a vehicle races past"], "sample_ids": ["vqZuVbG6-HI", "xjvTpk2Zpr8"], "start_seconds": ["130", "70"], "properties": ["background, male, female", "wind, blows, vehicle"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["an airplane engine runs", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["yVPZ2MNWpms", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["engine, airplane, runs", "female, spraying, scream"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a car is driving by on the road ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a male speaks and another male speaks", "a man speaks followed by another man speaking outside"], "sample_ids": ["viuTg1M-dqg", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["two males, speaking, male", "two men, speak, follow"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with background noise and breathing sounds "], "question": "which entity shows two men speaking?", "label": 0}, {"captions": ["waves crash against a shoreline and wind blows", "a car speeding up in the distance"], "sample_ids": ["zdYdyF9-m8U", "u0TrcHhkPQ"], "start_seconds": ["7", "20"], "properties": ["wind, crash, shoreline", "distance, car, speed"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", null], "captions_pred_audio": ["waves crash and wind blows ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["multiple ducks quack continuously", "water flows and trickles"], "sample_ids": ["wfHeoPDLMaM", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["multiple, quack, continuously", "water, flow, trickle"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["ducks are quacking", "water is splashing and gurgling"], "question": "which entity is a source of water", "label": 1}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "some tunes played by whistling"], "sample_ids": ["w9lpbUn0hPc", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["male, wind, rustling", "tune, play, whistling"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "a propeller rotates loudly and intensely"], "sample_ids": ["w-4gHptFNuU", "ugHJF0hfYkg"], "start_seconds": ["21", "10"], "properties": ["engine revs, accelerates, bump", "loud, intense, propeller"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "a horn rings out as a machine runs by"], "sample_ids": ["vh30P49Po6s", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["loud, continuous, quacks", "a, horn, run"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a duck is quacking loudly", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is not continuous", "label": 1}, {"captions": ["a person snoring", "a frog croaks as other frogs croak in the background"], "sample_ids": ["t8tv5YRMJUg", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["a person, snore, loud", "background, frog, croak"], "captions_pred_video": ["of a man getting his face licked by another man", "a close up of a frog in the water"], "captions_pred_audio": ["a person sniffs and breathes heavily", "a frog is croaking"], "question": "which entity is not a person?", "label": 1}, {"captions": ["a flush is followed by gurgling water, then another flush", "a toilet flushes and a female speaks"], "sample_ids": ["tqR406bGiE", "yaln9y8I7ms"], "start_seconds": ["40", "230"], "properties": ["flush, water, gurgle", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a toilet is flushed", "a toilet flushes and a man speaks"], "question": "which entity has a female speaking?", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["zkKdxzNC97Y", "xfaoyyzw2WU"], "start_seconds": ["27", "180"], "properties": ["hard, surface, door", "loud, jet engine, roar"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a door is opened and closed", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "vehicles pass by on a roadway"], "sample_ids": ["vms5XGTDVQc", "tgbONvsP47Y"], "start_seconds": ["220", "0"], "properties": ["paper, crumpled, crinkled", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "footage of a fire truck entering a garage"], "captions_pred_audio": ["paper is crumpled and crinkled", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a drill runs and two people laugh", "a person snores loudly multiple times at a close distance"], "sample_ids": ["tEE3MpBt1sg", "sSMl2vc3ek"], "start_seconds": ["50", "20"], "properties": ["two people, laugh, drill", "loud, multiple, distance"], "captions_pred_video": ["footage is blurry due to the smoke in the air", null], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a woman talking as an infant is crying", "a stream runs then someone speaks"], "sample_ids": ["tMbMDvT50j8", "wbHTKEJZyhc"], "start_seconds": ["12", "20"], "properties": ["a, talk, infant", "stream, run, someone"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and"], "captions_pred_audio": ["a baby cries and a woman speaks", "a waterfall is flowing and people are speaking "], "question": "which entity is a stream?", "label": 1}, {"captions": ["water flows followed by women screaming", "a toilet flushes and a female speaks"], "sample_ids": ["w5W5Kqtc8E", "yaln9y8I7ms"], "start_seconds": ["100", "230"], "properties": ["water, flow, women", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a toilet flushes and a man speaks"], "question": "which entity is a video of a toilet flushing?", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "a man speaks as a vehicle engine idles"], "sample_ids": ["vb1fPSDI4c", "shmR4OZtzqA"], "start_seconds": ["30", "30"], "properties": ["multiple, people, yell", "man, engine, idle"], "captions_pred_video": [null, "shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a man speaks while a motor runs"], "question": "which entity has a vehicle engine idle?", "label": 1}, {"captions": ["white noise and birds chirping", "water splashes as an animal walks through"], "sample_ids": ["wRBHTgrbiwg", "w1ir-sZ3Im8"], "start_seconds": ["50", "90"], "properties": ["noise, white, chirping", "animal, water, splashes"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a helicopter engine idles continuously", "a clock ticktocks"], "sample_ids": ["ugHJF0hfYkg", "v-g-j2uTByM"], "start_seconds": ["10", "30"], "properties": ["engine, idle, continuously", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a helicopter is flying overhead ", "a clock is ticking loudly"], "question": "which entity is ticking continuously", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "a frog croaks as other frogs croak in the background"], "sample_ids": ["sShpyu2l4YQ", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["growl, bark, yip", "background, frog, croak"], "captions_pred_video": ["the puppies are playing with a toy", "a close up of a frog in the water"], "captions_pred_audio": ["a dog is barking and growling", "a frog is croaking"], "question": "which entity is a solitary animal", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "wind blowing followed by a zoom"], "sample_ids": ["zsLxS-uLJTw", "vr8ZXjEBhMQ"], "start_seconds": ["20", "150"], "properties": ["horn, blast, train", "wind, blow, zoom"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["u5RmF3c3Aw", "w5W5Kqtc8E"], "start_seconds": ["60", "100"], "properties": ["engine, car, zoom", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a car zooming by?", "label": 0}, {"captions": ["children speak and play together", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["yVVP8XvWJTo", "wz7N8YRy74I"], "start_seconds": ["260", "30"], "properties": ["children, speak, play", "rooster, crow, background, men"], "captions_pred_video": ["footage of a playground at a school or daycare center", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity shows a rooster crow?", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "water splashes as an animal walks through"], "sample_ids": ["xV7Mg1QucSc", "w1ir-sZ3Im8"], "start_seconds": ["14", "90"], "properties": ["alarm, ticktocks, laughs", "animal, water, splashes"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "a clock ticktocks and sounds an alarm then a man laughs"], "sample_ids": ["w34HjHr6gAY", "xV7Mg1QucSc"], "start_seconds": ["30", "14"], "properties": ["beeps, hit, woman", "alarm, ticktocks, laughs"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "an alarm clock ticks and a woman laughs"], "question": "which entity has a man laugh?", "label": 1}, {"captions": ["wind blowing and birds chirping with the distant cooing of a large bird", "waves crash against a shoreline and people speak"], "sample_ids": ["wRBHTgrbiwg", "yFB25fqfU8I"], "start_seconds": ["50", "300"], "properties": ["birds, chirp, cooing", "wave, crash, shoreline"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of a person surfing in the ocean"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more active", "label": 1}, {"captions": ["white noise and birds chirping", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wRBHTgrbiwg", "zj2R0XoFr5k"], "start_seconds": ["50", "50"], "properties": ["noise, white, chirping", "airplane, boy, fly"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video", "label": 1}, {"captions": ["a man speaking together with birds chirping and distant murmuring", "a clock ticktocks in wind"], "sample_ids": ["uiS58TNyUiw", "yVumC9TGknc"], "start_seconds": ["430", "30"], "properties": ["audio, man, speaking", "ticktocks, clock, wind"], "captions_pred_video": ["of the pigeon in the cage", "game title screen of the game shadow of the colossus on sony playstation 2"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a series of beeps and chirps"], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaks as a machine runs", "water runs into a sink while men speak"], "sample_ids": ["vD6lYD1l0BY", "vzceMbklWc"], "start_seconds": ["330", "180"], "properties": ["a, machine, run", "water, sink, run"], "captions_pred_video": ["game controller being held in the hands of the person", null], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "water is running and a man is speaking"], "question": "which entity is a man speaking as a machine runs?", "label": 0}, {"captions": ["a clock alarm sounds and gears turn", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["w2M4i1mklOA", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["alarm, gears, turn", "airplane, boy, fly"], "captions_pred_video": ["footage of an antique clock", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["ukxt9I7eMMg", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["food, pan, cook", "rustling, ducks, quack"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "a wooden clack accompanies nearby chirping birds"], "sample_ids": ["ylpYOorfH4o", "yeFvk9x0wWI"], "start_seconds": ["410", "30"], "properties": ["engine, run, loud", "clack, bird, chirp"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["a man is speaking and an engine is revving", "birds chirp in the background as a car drives by "], "question": "which entity is quieter", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "a door opens and closes"], "sample_ids": ["yajyRTUQk3U", "vBHyYJ8pL0"], "start_seconds": ["400", "2"], "properties": ["noise, woman, speak", "open, close, door"], "captions_pred_video": ["- a woman cooking in the kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is more silent", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "someone is typing on a computer keyboard"], "sample_ids": ["vuUVPzd2FXw", "v0x1odnXtP0"], "start_seconds": ["160", "210"], "properties": ["a, steam, release", "keyboard, type, computer"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a woman sneezes then speaks", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["x4dZyf9Gbj0", "xfaoyyzw2WU"], "start_seconds": ["130", "180"], "properties": ["sneezes, speaks, woman", "loud, jet engine, roar"], "captions_pred_video": ["footage is blurry and out of focus", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a woman sneezes and speaks", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a duck quacks continuously"], "sample_ids": ["tDVADusiIoc", "vh30P49Po6s"], "start_seconds": ["60", "30"], "properties": ["water, radio, man", "quacks, continuously, duck"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a duck is quacking loudly"], "question": "which entity is speaking", "label": 0}, {"captions": ["multiple birds vocalize and wind blows", "an infant crying as a woman laughs"], "sample_ids": ["uoGVs9yUqY4", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["multiple, vocalize, wind", "a, laugh, infant"], "captions_pred_video": ["for how to make a wooden shed door youtube", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a baby cries and a woman speaks"], "question": "which entity is a human", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "birds chirp and an owl hoots before a man speaks briefly"], "sample_ids": ["yks4cLgIDMc", "wRBHTgrbiwg"], "start_seconds": ["170", "50"], "properties": ["background, speaking, child", "bird, owl, speak"], "captions_pred_video": ["footage of two kids wrestling on the floor", "of a bee pollinating the flowers in the field"], "captions_pred_audio": ["a man is speaking and a child is crying", "birds are chirping and insects are buzzing"], "question": "which entity has a bird speaking?", "label": 1}, {"captions": ["humming of idling and revving engine with a man speaking", "a woman speaks as she rubs two objects together"], "sample_ids": ["wqADXCzngMw", "vzxHnu-SFEw"], "start_seconds": ["340", "80"], "properties": ["audio, humming, revving", "two objects, woman, speak"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["xjhAnI2q6hM", "ukg5L09Wpvo"], "start_seconds": ["6", "150"], "properties": ["engine revs, vehicle, people", "clickety-clack, train, whistle"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "birds chirp and objects are moved around"], "sample_ids": ["sapQIQUhFc", "yPUYU6t3rwo"], "start_seconds": ["280", "370"], "properties": ["liquid, flow, distance", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaking with light rustling", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zOZleIRqZm4", "tdWhHV3X25Q"], "start_seconds": ["80", "60"], "properties": ["light, rustling, man", "applause, audience, yells"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaking with light rustling", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["zOZleIRqZm4", "y2bVZ7rz-5M"], "start_seconds": ["80", "280"], "properties": ["light, rustling, man", "motor noise, horn, siren"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a truck is honking its horn and a siren is blaring "], "question": "which entity is more quiet", "label": 0}, {"captions": ["wind blows strongly and a young man speaks", "a vehicle engine accelerates and wind blows"], "sample_ids": ["vs65y4qmyBE", "wudZTNBtVqc"], "start_seconds": ["340", "60"], "properties": ["wind, blows, strongly", "accelerates, engine, wind"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "footage is of a parking lot with cars parked in it"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a car accelerates and revs its engine "], "question": "which entity is about a vehicle engine accelerating and wind blowing?", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "a piece of wood is being placed down and sawed"], "sample_ids": ["tiDFTC-5vU", "uiItxDsDMFI"], "start_seconds": ["30", "30"], "properties": ["male, duck, laugh", "wood, piece, saw"], "captions_pred_video": [null, "a man cutting a log with an axe in the woods"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a saw is being used with background noise "], "question": "which entity is being cut", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vZAw4apG0Es", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["people, clock, converse", "engine, laugh, loud"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a clock is ticking and people are talking", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "people cheer as a vehicle engine revs"], "sample_ids": ["w34HjHr6gAY", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["beeps, hit, woman", "engine revs, vehicle, people"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["x6ijhqRY38s", "xKB8O8LTs6s"], "start_seconds": ["250", "70"], "properties": ["something metal, glass, hit", "music, gunfire, explosion"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "someone snores nearby"], "sample_ids": ["se87d6yxEOA", "spJCm8tD9Zo"], "start_seconds": ["10", "90"], "properties": ["run, whistle, pass", "someone snores, nearby, someone"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a person is snoring loudly"], "question": "which is quieter", "label": 0}, {"captions": ["a person is burping while a girl speaks", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vdoxuJn9lTc", "tdWhHV3X25Q"], "start_seconds": ["40", "60"], "properties": ["person, burp, girl", "applause, audience, yells"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a child speaks followed by a burp", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["wyllXV6PjKo", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["a baby, a woman, a man", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman speaks and a baby cries", "a woman is speaking and a baby is crying"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "three men talk while wind blows and some liquid flows"], "sample_ids": ["yZrFNS7GFBQ", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["pigeon, buzzes, insect", "three men, wind, flow"], "captions_pred_video": ["of the bird in the cage", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["an owl hoots in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a pigeon?", "label": 0}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "people speak as gunfire rings out"], "sample_ids": ["wvKpEYswXO0", "wqTCwqVRDlk"], "start_seconds": ["150", "80"], "properties": ["water, tap, run", "gunfire, ring, speak"], "captions_pred_video": ["of the person preparing food in the kitchen", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "people speak in the background as a clock ticktocks"], "sample_ids": ["sU53zg9Jp7s", "vZAw4apG0Es"], "start_seconds": ["380", "30"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "background, clock, ticktocks"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a clock is ticking and people are talking"], "question": "which entity has a clock ticktocking?", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "men speak and a nozzle sprays liquid"], "sample_ids": ["yaln9y8I7ms", "wRV8yMk886E"], "start_seconds": ["230", "0"], "properties": ["female, flushes, toilet", "liquid, spray, nozzle"], "captions_pred_video": ["footage is blurry and out of focus", "two cars are parked in a parking lot at night"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a man speaks followed by a loud burst"], "question": "which entity is a machine", "label": 1}, {"captions": ["wind blows strongly", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["w8uLijTqtlU", "tiDFTC-5vU"], "start_seconds": ["70", "30"], "properties": ["wind, blows, strongly", "male, duck, laugh"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking and ducks are quacking"], "question": "which entity is more likely to be a natural phenomenon", "label": 0}, {"captions": ["children cheer as a man speaks then an audience screams", "people applaud and hoot and chat quietly"], "sample_ids": ["vJvryTwuAV8", "wwyfGO2J4"], "start_seconds": ["16", "90"], "properties": ["audience, cheer, man", "people, applaud, hoot"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", null], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "people are clapping and speaking with background noise "], "question": "which entity shows a quieter audience", "label": 1}, {"captions": ["an engine runs loudly", "a toilet flushes and a female speaks"], "sample_ids": ["vqZuVbG6-HI", "yaln9y8I7ms"], "start_seconds": ["130", "230"], "properties": ["loud, engine, run", "female, flushes, toilet"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage is blurry and out of focus"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a toilet flushes and a man speaks"], "question": "which entity is silent", "label": 1}, {"captions": ["a person is snoring while sleeping", "dogs barking and whimpering"], "sample_ids": ["vJrjSeP17yE", "tIY7qOV3rEM"], "start_seconds": ["40", "0"], "properties": ["a person is sleeping, snoring, person", "barking, whimpering, dog"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a dog is standing in the middle of a dirt road in the woods"], "captions_pred_audio": ["a person snoring loudly", "a dog is barking and a cat is meowing"], "question": "which entity is a dog", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "several insects fly while two men talk"], "sample_ids": ["wTjoRj1se3U", "s-T9OVOiMLo"], "start_seconds": ["390", "330"], "properties": ["engine, run, people", "several, fly, men"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a jet engine is running and people are talking", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more likely to be in a nature setting", "label": 1}, {"captions": ["music plays followed by gunshots and then an explosion", "water is sprayed across a hard surface"], "sample_ids": ["xKB8O8LTs6s", "sQwlkXjQabo"], "start_seconds": ["70", "10"], "properties": ["music, gunshots, explosion", "water, spray, surface"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a infant makes noise and is excited", "bees buzz and wind blows"], "sample_ids": ["wIJK3-5y0kA", "tMJne1a4AFI"], "start_seconds": ["30", "0"], "properties": ["noise, excited, infant", "bees buzz, wind blows, bees"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "a swarm of bees on the ground"], "captions_pred_audio": ["a baby cries and a woman speaks", "a swarm of bees buzzing around"], "question": "which entity is buzzing", "label": 1}, {"captions": ["a man speaking with light rustling", "ticking continues without interruption"], "sample_ids": ["zOZleIRqZm4", "v-g-j2uTByM"], "start_seconds": ["80", "30"], "properties": ["light, rustling, man", "ticking, continuous, clock"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a clock is ticking loudly"], "question": "which entity is continuous", "label": 1}, {"captions": ["children speak and play together", "vehicles pass by on a roadway"], "sample_ids": ["yVVP8XvWJTo", "tgbONvsP47Y"], "start_seconds": ["260", "0"], "properties": ["children, speak, play", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a playground at a school or daycare center", "footage of a fire truck entering a garage"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "a car accelerates and wind blows"], "sample_ids": ["uJV8NDaHqqk", "u0TrcHhkPQ"], "start_seconds": ["100", "20"], "properties": ["loud, fly, chirp", "accelerates, wind, blows"], "captions_pred_video": ["a bee hive in a wooden box", null], "captions_pred_audio": ["a swarm of bees buzzing around", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "waves crash against a shoreline and people speak"], "sample_ids": ["rwTERCUno", "yFB25fqfU8I"], "start_seconds": ["90", "300"], "properties": ["engine, idle, sputter", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["an engine is idling and vibrating", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a jet engine spools up and takes off", "some men converse over an engine running"], "sample_ids": ["vBslzh7saPw", "sCiy7QS1U"], "start_seconds": ["90", "300"], "properties": ["engine, spools, takes", "men, converse, engine"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "people cheer as a vehicle engine revs"], "sample_ids": ["vJ7JPEFhyLA", "xjhAnI2q6hM"], "start_seconds": ["16", "6"], "properties": ["three men, wind, flow", "engine revs, vehicle, people"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["an infant crying frantically", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["zwOBqeFTgiU", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["cry, infant, frantically", "water, radio, man"], "captions_pred_video": ["of the baby crying in the car seat", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a baby cries loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a train horn sounds as it passes by", "people cheer as a vehicle engine revs"], "sample_ids": ["ukg5L09Wpvo", "xjhAnI2q6hM"], "start_seconds": ["150", "6"], "properties": ["sound, train, horn", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["wind noise makes sound into a microphone", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["w8uLijTqtlU", "w5W5Kqtc8E"], "start_seconds": ["70", "100"], "properties": ["wind, microphone, noise", "wind, blow, vehicle"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["the wind is blowing strongly", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a recording of wind noise?", "label": 0}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["w-4gHptFNuU", "sLUnaPT5gM8"], "start_seconds": ["21", "0"], "properties": ["engine revs, accelerates, bump", "loud, laughter, intermittent"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["engines sputter roughly and tires squeal", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["zhx6hoYrHeI", "sLUnaPT5gM8"], "start_seconds": ["160", "0"], "properties": ["engine, sputter, rough", "loud, laughter, intermittent"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more like a scream", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vlJS7LN2XyM", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["background, clocks, ticking", "applause, audience, yells"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "someone whistles a song"], "sample_ids": ["tiDFTC-5vU", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["male, duck, laugh", "someone, song, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a person whistling a song"], "question": "which entity is a person", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "a woman speaks as she rubs two objects together"], "sample_ids": ["sZPuqDgX2V0", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["commentator, race, track", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a skill", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "multiple people speak and children yell while water gurgles"], "sample_ids": ["slZLHwNbbt4", "vb1fPSDI4c"], "start_seconds": ["300", "30"], "properties": ["clap, distance, horn", "multiple, people, yell"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", null], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man speaks and is typing on a keyboard", "a vehicle engine accelerating then running on idle"], "sample_ids": ["x9JovgqUcs", "vYkA3cfXp5Q"], "start_seconds": ["500", "30"], "properties": ["a, man, speaks, keyboard", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man speaks and types on a keyboard", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["people speak softly as food sizzles", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["yhQ2Lg-7qDY", "zFjIWfSD-4"], "start_seconds": ["130", "410"], "properties": ["food, sizzle, speak", "People, motor, brakes"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", null], "captions_pred_audio": ["a faucet is running and a man is speaking", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a motor?", "label": 1}, {"captions": ["a small engine spits as it runs", "multiple motorcycles pass by as a man speaks"], "sample_ids": ["sZvwOuuPGP0", "zcDwZ6W7E3E"], "start_seconds": ["50", "180"], "properties": ["spits, engine, runs", "man, speak, motorcycles"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "2 people riding motorcycles down a mountain road with trees lining the sides of the road"], "captions_pred_audio": ["a medium engine is running ", "a man is speaking while a car accelerates and revs its engine "], "question": "which entity is a video of multiple motorcycles passing by as a man speaks?", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["u--KhUW8l1Y", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["horn, siren, life", "engine, idle, woman"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "an infant crying as a woman laughs"], "sample_ids": ["zofjfKhqLk8", "xhmRY9yhC7c"], "start_seconds": ["10", "20"], "properties": ["background, metal, clings", "a, laugh, infant"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "small dogs yip and bark sharply"], "sample_ids": ["xfudFO976zE", "v-wcQf4BDY0"], "start_seconds": ["0", "120"], "properties": ["animal, bleats, cry", "bark, yip, sharply"], "captions_pred_video": ["footage is blurry and shaky", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a dog barks and growls"], "question": "which animal is more aggressive", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "multiple adults speaking, and a child shouting in the background"], "sample_ids": ["v0x1odnXtP0", "yks4cLgIDMc"], "start_seconds": ["210", "170"], "properties": ["keyboard, type, computer", "background, speaking, child"], "captions_pred_video": ["how to make money on youtube in spanish", "footage of two kids wrestling on the floor"], "captions_pred_audio": ["a person is typing on a keyboard", "a man is speaking and a child is crying"], "question": "which entity has a child shouting in the background", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "birds chirp and objects are moved around"], "sample_ids": ["vbZ-0lGPneg", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["a woman, a television program, a bird", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "insects buzz and a man speaks"], "question": "which entity has more birds", "label": 1}, {"captions": ["leaves rustle while man speaks", "three men talk while wind blows and some liquid flows"], "sample_ids": ["zOZleIRqZm4", "vJ7JPEFhyLA"], "start_seconds": ["80", "16"], "properties": ["leaves, rustle, speak", "three men, wind, flow"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a person burps loudly for a long time nearby", "an infant crying frantically"], "sample_ids": ["vf44CgrjT0A", "zwOBqeFTgiU"], "start_seconds": ["20", "30"], "properties": ["loud, long, person", "cry, infant, frantically"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "of the baby crying in the car seat"], "captions_pred_audio": ["a loud burp", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "a child speaks in closed space"], "sample_ids": ["sSMl2vc3ek", "yW6FWLSLkx4"], "start_seconds": ["20", "40"], "properties": ["a person, laughs, snores", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 0}, {"captions": ["several beeps are followed by a hit and a woman talking", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["w34HjHr6gAY", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["beeps, hit, woman", "engine, revs, vehicle"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "an engine runs loudly"], "sample_ids": ["sLUnaPT5gM8", "vqZuVbG6-HI"], "start_seconds": ["0", "130"], "properties": ["loud, laughter, intermittent", "loud, engine, run"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as crickets sing", "a chime of a clock followed by various tones of ticking with come clinking"], "sample_ids": ["ryFDPxgDOGc", "uqFtmnhuqA8"], "start_seconds": ["570", "30"], "properties": ["a, crickets, sing", "a, b, c"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", "shows a clock on the wall of a room with a person standing in front of it"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "mechanisms are ticking and a hammer is striking "], "question": "which entity is a clock?", "label": 1}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "a toilet flushes and a female speaks"], "sample_ids": ["smDKStoHBJo", "yaln9y8I7ms"], "start_seconds": ["0", "230"], "properties": ["a, infant, speaking", "female, flushes, toilet"], "captions_pred_video": ["a man holding a crying baby in his arms", "footage is blurry and out of focus"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a toilet flushes and a man speaks"], "question": "which entity is about a toilet?", "label": 1}, {"captions": ["a man speaks as a machine runs", "an small aircraft engine runs and a boy speaks"], "sample_ids": ["vD6lYD1l0BY", "xSKJGCItUWE"], "start_seconds": ["330", "10"], "properties": ["a, machine, run", "engine, run, boy"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of the helicopter flying in the room"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a high pitched engine is running and a child speaks"], "question": "which entity has a boy speaking?", "label": 1}, {"captions": ["people speak in a closed space", "water is sprayed across a hard surface"], "sample_ids": ["sTpirNYo8vQ", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["people, space, speak", "water, spray, surface"], "captions_pred_video": ["of a man taking a selfie on a bus", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "two men speak as a buffeting wind blows"], "sample_ids": ["wwyfGO2J4", "y8WEcpOlT3I"], "start_seconds": ["90", "40"], "properties": ["people, applaud, hoot", "wind, speak, buffeting"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "a toilet flushes and a female speaks"], "sample_ids": ["sjlVMgdGSK0", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["accelerates, vehicle, race car", "female, flushes, toilet"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "footage is blurry and out of focus"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a toilet flushes and a man speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a door slams shut roughly", "people applaud and hoot and chat quietly"], "sample_ids": ["zkKdxzNC97Y", "wwyfGO2J4"], "start_seconds": ["27", "90"], "properties": ["a door, slams, shut", "people, applaud, hoot"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "multiple adults speaking, and a child shouting in the background"], "sample_ids": ["zFjIWfSD-4", "yks4cLgIDMc"], "start_seconds": ["410", "170"], "properties": ["People, motor, brakes", "background, speaking, child"], "captions_pred_video": [null, "footage of two kids wrestling on the floor"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a man is speaking and a child is crying"], "question": "which entity has a child shouting in the background?", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "a man speaks uses a drill"], "sample_ids": ["zcDwZ6W7E3E", "x5eIC7S0fbg"], "start_seconds": ["180", "60"], "properties": ["man, speak, motorcycles", "A man is speaking, uses a drill, and is a tool"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "a person in surgical gloves is using a needle to remove a small object from a tooth"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a man is speaking and using a power tool "], "question": "which man is speaking", "label": 1}, {"captions": ["a jet engine spools up and takes off", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vBslzh7saPw", "tdWhHV3X25Q"], "start_seconds": ["90", "60"], "properties": ["engine, spools, takes", "applause, audience, yells"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["u--KhUW8l1Y", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["horn, siren, life", "airplane, boy, fly"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "a frog croaks as other frogs croak in the background"], "sample_ids": ["spJCm8tD9Zo", "yswmmRZFItk"], "start_seconds": ["90", "0"], "properties": ["snores, wheezes, sleeps", "background, frog, croak"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a close up of a frog in the water"], "captions_pred_audio": ["a person is snoring loudly", "a frog is croaking"], "question": "which entity is a croaker", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["yZrFNS7GFBQ", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["pigeon, buzzes, insect", "men, talk, cars"], "captions_pred_video": ["of the bird in the cage", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["an owl hoots in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["several ducks are quacking and squawking", "a woman speaks happily and an animal chirps"], "sample_ids": ["wfHeoPDLMaM", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["quacking, squawking, ducks", "a woman, chirps, animal"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", null], "captions_pred_audio": ["ducks are quacking", "a woman is speaking and a dog is barking "], "question": "which entity is a bird?", "label": 0}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["wvKpEYswXO0", "vfYTJq7nU"], "start_seconds": ["150", "130"], "properties": ["water, tap, run", "rustling, ducks, quack"], "captions_pred_video": ["of the person preparing food in the kitchen", null], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a duck quacks and a woman speaks"], "question": "which entity is about water?", "label": 0}, {"captions": ["a person sniffles and sneezes", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["uRlbY6aoBU", "wDVMhEdTiVw"], "start_seconds": ["0", "30"], "properties": ["sneezes, sniffles, person", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is sneezing ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is not followed by water sloshing nearby?", "label": 0}, {"captions": ["a toilet flushes and water drains", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["sfAvvZwdLCY", "tdWhHV3X25Q"], "start_seconds": ["20", "60"], "properties": ["water drains, flushes, water", "applause, audience, yells"], "captions_pred_video": ["footage of the toilet in the bathroom", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["y1saVTXsKwc", "xKB8O8LTs6s"], "start_seconds": ["80", "70"], "properties": ["a, dog, talk", "music, gunfire, explosion"], "captions_pred_video": ["a dog playing with a pink ball", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a dog barks and a man speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["zl9Dqx-j7q4", "vlS6YMeWAPo"], "start_seconds": ["6", "40"], "properties": ["engine, laugh, loud", "sheep, baa, birds"], "captions_pred_video": ["footage of a man driving a car in the dark", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a jet engine roars ", "a goat bleats and birds chirp"], "question": "which entity is followed by a man laughing", "label": 0}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "a man speaks followed by another man speaking outside"], "sample_ids": ["yZrFNS7GFBQ", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["pigeon, buzzes, insect", "two men, speak, follow"], "captions_pred_video": ["of the bird in the cage", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["an owl hoots in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a single person speaking?", "label": 0}, {"captions": ["a motorcycle engine is idling", "a vehicle engine accelerating then running on idle"], "sample_ids": ["vZAqdHZ81yA", "vYkA3cfXp5Q"], "start_seconds": ["180", "30"], "properties": ["engine, motorcycle, idling", "engine, accelerate, idle"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["an engine is idling loudly", "an engine is idling"], "question": "which entity has an engine that is idling", "label": 0}, {"captions": ["bees buzz and wind blows", "frogs croak and vocalize"], "sample_ids": ["tMJne1a4AFI", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["bees buzz, wind blows, bees", "croak, vocalize, frog"], "captions_pred_video": ["a swarm of bees on the ground", "a close up of a frog in the water"], "captions_pred_audio": ["a swarm of bees buzzing around", "a frog is croaking"], "question": "which animal is more likely to be a frog", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "birds twitter and chirp and clatter"], "sample_ids": ["vJvryTwuAV8", "yeFvk9x0wWI"], "start_seconds": ["16", "30"], "properties": ["audience, cheer, man", "chirp, twitter, clatter"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "birds chirp in the background as a car drives by "], "question": "which entity is more quiet", "label": 1}, {"captions": ["some people speak", "a car speeding up in the distance"], "sample_ids": ["vbZ-0lGPneg", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "distance, car, speed"], "captions_pred_video": ["of a man holding a baby duck in his hands", null], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "a man speaks as a motor runs in the background"], "sample_ids": ["uWPRNLnpy7Y", "xZepNM9qcRA"], "start_seconds": ["10", "30"], "properties": ["accelerate, laugh, vehicle", "background, motor, run"], "captions_pred_video": ["is taken from a car driving down the street", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["a person speaks over rustling leaves", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["zOZleIRqZm4", "w5W5Kqtc8E"], "start_seconds": ["80", "100"], "properties": ["rustling, leaves, person", "wind, blow, vehicle"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blowing?", "label": 1}, {"captions": ["birds chirp and objects are moved around", "an airplane flies overhead as a woman speaks"], "sample_ids": ["yPUYU6t3rwo", "zj2R0XoFr5k"], "start_seconds": ["370", "50"], "properties": ["birds chirp, objects are moved around, birds", "airplane, fly, overhead"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["insects buzz and a man speaks", "a woman speaks while a helicopter flies overhead "], "question": "which object is moving around", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "waves crash against a shoreline and people speak"], "sample_ids": ["sa6TLVbooCc", "yFB25fqfU8I"], "start_seconds": ["240", "300"], "properties": ["people, laugh, child", "wave, crash, shoreline"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more calm", "label": 0}, {"captions": ["a woman speaks as she rubs two objects together", "birds chirp and objects are moved around"], "sample_ids": ["vzxHnu-SFEw", "yPUYU6t3rwo"], "start_seconds": ["80", "370"], "properties": ["two objects, woman, speak", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["water flows and trickles", "a person snores loudly multiple times at a close distance"], "sample_ids": ["tB7hWb9gTuQ", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["water, flow, trickle", "loud, multiple, distance"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", null], "captions_pred_audio": ["water is splashing and gurgling", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "several insects fly while two men talk"], "sample_ids": ["vmrxwuAMb2I", "s-T9OVOiMLo"], "start_seconds": ["40", "330"], "properties": ["a dog, inhales, exhales", "several, fly, men"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a dog barks and growls", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a still image?", "label": 0}, {"captions": ["someone whistles a tune", "a stream of water runs briefly"], "sample_ids": ["sIXTftIuUgw", "x-PeY8Yb8M4"], "start_seconds": ["90", "300"], "properties": ["someone, tune, whistle", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a person whistling a song", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["someone is burping continuously", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["y636gklDioE", "tiDFTC-5vU"], "start_seconds": ["20", "30"], "properties": ["burps, burps, burps", "male, duck, laugh"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", null], "captions_pred_audio": ["a person burps loudly several times", "a man is speaking and ducks are quacking"], "question": "which entity is a person", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["smDKStoHBJo", "wz7N8YRy74I"], "start_seconds": ["0", "30"], "properties": ["a, talk, baby, cry", "rooster, crow, background, men"], "captions_pred_video": ["a man holding a crying baby in his arms", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "wind blowing followed by a zoom"], "sample_ids": ["slZLHwNbbt4", "vr8ZXjEBhMQ"], "start_seconds": ["300", "150"], "properties": ["a, horn, run", "wind, blow, zoom"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom of", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["sa6TLVbooCc", "y8WEcpOlT3I"], "start_seconds": ["240", "40"], "properties": ["people, laugh, child", "harsh, wind, blows"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a man is speaking with wind noise in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "water pouring and bubbling"], "sample_ids": ["wRBHTgrbiwg", "uyRfq-jKPpo"], "start_seconds": ["50", "50"], "properties": ["bird, owl, speak", "water, bubbles, pouring"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "water is running from a faucet"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["uWPRNLnpy7Y", "uYT5gxnyMWM"], "start_seconds": ["10", "50"], "properties": ["accelerate, laugh, vehicle", "female, spraying, scream"], "captions_pred_video": ["is taken from a car driving down the street", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a person sniffles and then sneezes in the distance", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["uRlbY6aoBU", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["a, distance, sneeze", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is sneezing ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 0}, {"captions": ["someone snores nearby", "people speak as gunfire rings out"], "sample_ids": ["spJCm8tD9Zo", "wqTCwqVRDlk"], "start_seconds": ["90", "80"], "properties": ["someone snores, nearby, someone", "gunfire, ring, speak"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["motors runs briefly and tires screech", "water runs into a sink while men speak"], "sample_ids": ["yRx9txMcBl0", "vzceMbklWc"], "start_seconds": ["40", "180"], "properties": ["motors, tires, screech", "water, sink, run"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", null], "captions_pred_audio": ["a car is revving its engine and skidding ", "water is running and a man is speaking"], "question": "which entity is a video of a sink?", "label": 1}, {"captions": ["electronic beeps occur in a short series", "a person snores loudly multiple times at a close distance"], "sample_ids": ["y682ml90jGw", "sSMl2vc3ek"], "start_seconds": ["11", "20"], "properties": ["beeps, series, electronic", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wAAkbZToh8", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["burp, laugh, speak", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man burps and a woman speaks", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a person speaking and laughing?", "label": 0}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["y2bVZ7rz-5M", "zl9Dqx-j7q4"], "start_seconds": ["280", "6"], "properties": ["engine, horn, siren", "engine, laugh, loud"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a jet engine roars "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["motors rev and run loudly as a person laughs", "water is sprayed across a hard surface"], "sample_ids": ["zl9Dqx-j7q4", "sQwlkXjQabo"], "start_seconds": ["6", "10"], "properties": ["motors rev, laugh, loudly", "water, spray, surface"], "captions_pred_video": ["footage of a man driving a car in the dark", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a jet engine roars ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "an insect buzzes around continuously"], "sample_ids": ["zj2R0XoFr5k", "v25l1jef3JY"], "start_seconds": ["50", "0"], "properties": ["airplane, fly, overhead", "buzzes, continuously, insect"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a fly is buzzing around a microphone "], "question": "which entity is flying", "label": 0}, {"captions": ["a baby cries and a woman speaks", "some men converse over an engine running"], "sample_ids": ["tMbMDvT50j8", "sCiy7QS1U"], "start_seconds": ["12", "300"], "properties": ["a, cry, woman", "men, converse, engine"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a baby?", "label": 0}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "a whistling owl calls out repeatedly and insects screech"], "sample_ids": ["wy1eKjR7KC0", "w6RTHR6AeAg"], "start_seconds": ["30", "40"], "properties": ["people, talk, distance", "call, owl, screech"], "captions_pred_video": ["two police officers riding motorcycles down the street", null], "captions_pred_audio": ["a man is speaking and a siren is going off", "an owl hoots and mechanisms operate "], "question": "which entity is a bird?", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["uJV8NDaHqqk", "uZesmtKZGSw"], "start_seconds": ["100", "250"], "properties": ["loud, fly, chirp", "men, talk, cars"], "captions_pred_video": ["a bee hive in a wooden box", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["a toilet flushes and water drains", "an infant crying as a woman laughs"], "sample_ids": ["sfAvvZwdLCY", "xhmRY9yhC7c"], "start_seconds": ["20", "20"], "properties": ["water drains, flushes, water", "a, laugh, infant"], "captions_pred_video": ["footage of the toilet in the bathroom", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a toilet is flushed", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a heavy rain falls endlessly", "someone whistles a tune"], "sample_ids": ["wP8ZKrlx3oA", "sIXTftIuUgw"], "start_seconds": ["40", "90"], "properties": ["heavy, rain, fall", "someone, tune, whistle"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", null], "captions_pred_audio": ["a heavy rain is falling on a surface", "a person whistling a song"], "question": "which is not a musical instrument", "label": 0}, {"captions": ["a person sneezes followed by another person speaking", "pigeons vocalize and birds chirp"], "sample_ids": ["t8CV69hcvF0", "uiS58TNyUiw"], "start_seconds": ["210", "430"], "properties": ["person, sneeze, follow", "vocalize, bird, chirp"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "of the pigeon in the cage"], "captions_pred_audio": ["a woman sneezes and speaks", "a man is speaking and a bee is buzzing"], "question": "which entity is not a person?", "label": 1}, {"captions": ["a man speaks as bees buzz and birds chirp", "multiple people speak and children yell while water gurgles"], "sample_ids": ["t25U-v4k4ts", "vb1fPSDI4c"], "start_seconds": ["40", "30"], "properties": ["bees buzz, birds chirp, man speaks", "multiple, people, yell"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", null], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a man is filing a hard object", "some men converse over an engine running"], "sample_ids": ["vveS8HT7Uog", "sCiy7QS1U"], "start_seconds": ["100", "300"], "properties": ["a man, hard, object", "men, converse, engine"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", null], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which object is harder to file", "label": 0}, {"captions": ["water splashes and wind noise is made into a microphone", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["sDSppXIlJrs", "w5W5Kqtc8E"], "start_seconds": ["27", "100"], "properties": ["microphone, water, wind", "wind, blow, vehicle"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", null], "captions_pred_audio": ["the wind is blowing and water is splashing", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running?", "label": 1}, {"captions": ["a man speaks as horns blow", "here comes the train and it starts to blow the horn and get close"], "sample_ids": ["tHyNqRyK34A", "s7knHCFW82w"], "start_seconds": ["24", "30"], "properties": ["a, man, speaks", "blow horn, get close, train"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "footage of the train on the tracks near a building and a car parked on the side of the road"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a train is blowing its horn and its wheels are squealing "], "question": "which entity is about a train blowing its horn?", "label": 1}, {"captions": ["a car accelerates and wind blows", "people applaud and hoot and chat quietly"], "sample_ids": ["u0TrcHhkPQ", "wwyfGO2J4"], "start_seconds": ["20", "90"], "properties": ["accelerates, wind, blows", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be at a concert", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a infant makes noise and is excited"], "sample_ids": ["zY3icUyMdh8", "wIJK3-5y0kA"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "noise, excited, infant"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "water splashes as an animal walks through"], "sample_ids": ["xzKKf9bKNUo", "w1ir-sZ3Im8"], "start_seconds": ["10", "90"], "properties": ["background, noise, snoring", "animal, water, splashes"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a person snoring loudly", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a person speaks over rustling leaves", "a stream of water flows as people talk and wind blows"], "sample_ids": ["zOZleIRqZm4", "xBxDz0CFVn0"], "start_seconds": ["80", "30"], "properties": ["rustling, leaves, person", "stream, water, flow"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["zofjfKhqLk8", "vbZ-0lGPneg"], "start_seconds": ["10", "30"], "properties": ["noise, stop, motor", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a woman is speaking and a dog is whimpering"], "question": "which entity is a television program?", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["v0x1odnXtP0", "tDVADusiIoc"], "start_seconds": ["210", "60"], "properties": ["keyboard, type, computer", "water, radio, man"], "captions_pred_video": ["how to make money on youtube in spanish", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a person is typing on a keyboard", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a person?", "label": 0}, {"captions": ["some men converse over an engine running", "a person snores loudly multiple times at a close distance"], "sample_ids": ["sCiy7QS1U", "sSMl2vc3ek"], "start_seconds": ["300", "20"], "properties": ["men, converse, engine", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a person snoring loudly"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["zdYdyF9-m8U", "xKB8O8LTs6s"], "start_seconds": ["7", "70"], "properties": ["wind, crash, shoreline", "music, gunfire, explosion"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["waves crash and wind blows ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["people speak as gunfire rings out", "a stream of water runs briefly"], "sample_ids": ["wqTCwqVRDlk", "x-PeY8Yb8M4"], "start_seconds": ["80", "300"], "properties": ["gunfire, ring, speak", "stream, water, run"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and a gun is fired", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["an adult woman speaks over chopping and silverware noises", "paper is crumpling consistently"], "sample_ids": ["yYJksgsxx5U", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["audio, woman, silverware", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "people speak as gunfire rings out"], "sample_ids": ["uPDn2BFTHk", "wqTCwqVRDlk"], "start_seconds": ["140", "80"], "properties": ["lady, laugh, baby", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a vehicle engine revs and tires squeal", "a man speaks as a vehicles passes by then a woman speaks"], "sample_ids": ["yDoT73BWsdA", "siJFXfGWgDk"], "start_seconds": ["10", "50"], "properties": ["engine revs, tires squeal, vehicle", "man, woman, vehicle"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and birds are chirping in the background "], "question": "which entity is a video of a vehicle?", "label": 0}, {"captions": ["water pouring and bubbling", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["uyRfq-jKPpo", "vbZ-0lGPneg"], "start_seconds": ["50", "30"], "properties": ["water, bubbles, pouring", "a woman, a television program, a bird"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["water is running from a faucet", "a woman is speaking and a dog is whimpering"], "question": "which entity is a video of a television program?", "label": 1}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "water pouring and bubbling"], "sample_ids": ["w9lpbUn0hPc", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["male, wind, rustling", "water, bubbles, pouring"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "some men converse over an engine running"], "sample_ids": ["uYT5gxnyMWM", "sCiy7QS1U"], "start_seconds": ["50", "300"], "properties": ["a, scream, girl", "men, converse, engine"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a girl speaking followed by a scream and more girls talking?", "label": 0}, {"captions": ["a train horn sounds as a railroad passing bell rings", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["zgUgkpk78xU", "tDVADusiIoc"], "start_seconds": ["70", "60"], "properties": ["horn, bell, train", "water, radio, man"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["tapping occurs then a baby cries", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wIJK3-5y0kA", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["a, cry, baby", "airplane, boy, fly"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a baby cries and a woman speaks", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["se87d6yxEOA", "ukg5L09Wpvo"], "start_seconds": ["10", "150"], "properties": ["run, whistle, pass", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a train blows its whistle and blows its horn "], "question": "which train whistle is continuous", "label": 1}, {"captions": ["a train horn blares as a train passes, then fades", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["zVacuqSb4LI", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["blares, fades, train", "airplane, boy, fly"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a man speaks as water trickles down a stream", "multiple people speak and children yell while water gurgles"], "sample_ids": ["sapQIQUhFc", "vb1fPSDI4c"], "start_seconds": ["280", "30"], "properties": ["water, stream, trickles", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "a toilet flushes and a female speaks"], "sample_ids": ["vddP56-ogds", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["liquid, laughs, man", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a toilet flushes and a man speaks"], "question": "which entity has a female speaking?", "label": 1}, {"captions": ["a man speaking together with birds chirping and distant murmuring", "a person snores loudly multiple times at a close distance"], "sample_ids": ["uiS58TNyUiw", "sSMl2vc3ek"], "start_seconds": ["430", "20"], "properties": ["audio, man, speaking", "loud, multiple, distance"], "captions_pred_video": ["of the pigeon in the cage", null], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["y2bVZ7rz-5M", "w34HjHr6gAY"], "start_seconds": ["280", "30"], "properties": ["motor noise, horn, siren", "beeps, hit, woman"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "an infant crying frantically"], "sample_ids": ["uYT5gxnyMWM", "zwOBqeFTgiU"], "start_seconds": ["50", "30"], "properties": ["female, spraying, scream", "cry, infant, frantically"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "of the baby crying in the car seat"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["someone snores nearby", "three men talk while wind blows and some liquid flows"], "sample_ids": ["spJCm8tD9Zo", "vJ7JPEFhyLA"], "start_seconds": ["90", "16"], "properties": ["someone snores, nearby, someone", "three men, wind, flow"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["u--KhUW8l1Y", "vbZ-0lGPneg"], "start_seconds": ["0", "30"], "properties": ["horn, siren, life", "a woman, a television program, a bird"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a still image?", "label": 0}, {"captions": ["a door opens and birds chirp", "pigeons vocalize and birds chirp"], "sample_ids": ["yeFvk9x0wWI", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["door, open, birds", "vocalize, bird, chirp"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "of the pigeon in the cage"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["an airplane accelerates briefly", "some liquid flows while a woman laughs and man talks"], "sample_ids": ["zjTG0gaGCUI", "vddP56-ogds"], "start_seconds": ["80", "30"], "properties": ["accelerates, airplane, briefly", "liquid, laughs, man"], "captions_pred_video": [null, null], "captions_pred_audio": ["a jet engine roars as wind blows ", "water is running and gurgling and a man is speaking"], "question": "which entity is a video of a man talking?", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "popping and crackling repeats as men yell and laugh"], "sample_ids": ["xZepNM9qcRA", "rqu8iB22IY"], "start_seconds": ["30", "5"], "properties": ["background, motor, run", "sound, repeats, laugh"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", null], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a dog barks and a man speaks while music plays "], "question": "which entity has a motor running in the background?", "label": 0}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "a toilet flushes and a female speaks"], "sample_ids": ["xKB8O8LTs6s", "yaln9y8I7ms"], "start_seconds": ["70", "230"], "properties": ["music, gunfire, explosion", "female, flushes, toilet"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage is blurry and out of focus"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a toilet flushes and a man speaks"], "question": "which entity is more likely to be in a movie", "label": 0}, {"captions": ["a girl speaks followed by a scream and more girls talking", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["uYT5gxnyMWM", "w34HjHr6gAY"], "start_seconds": ["50", "30"], "properties": ["a, scream, girl", "beeps, hit, woman"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["an infant crying as a woman laughs", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["xhmRY9yhC7c", "wqZ135Ssz0"], "start_seconds": ["20", "60"], "properties": ["a, laugh, infant", "two men, woman, birds"], "captions_pred_video": ["of a baby crying in a baby bouncer", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a vehicle engine runs as a siren and horn sound", "water pouring and bubbling"], "sample_ids": ["u--KhUW8l1Y", "uyRfq-jKPpo"], "start_seconds": ["0", "50"], "properties": ["sound, vehicle, horn", "water, bubbles, pouring"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["wqUmIEzuNz4", "tDlysoZiA1I"], "start_seconds": ["30", "0"], "properties": ["frog, bird, vocalize", "animal, grunts, chirps"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a cat meows and rustles", "birds are chirping and a rooster is crowing "], "question": "which entity is a frog", "label": 0}, {"captions": ["a cat meows and children speak", "people speak as gunfire rings out"], "sample_ids": ["x5cuQjOdM3E", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["cat, speak, children", "gunfire, ring, speak"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking and a gun is fired"], "question": "which entity is more calm", "label": 0}, {"captions": ["a toilet door squeaks as it is opened", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sdXV-ylviw", "zj2R0XoFr5k"], "start_seconds": ["190", "50"], "properties": ["door, toilet, squeaks", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a dog barks and taps with background noise ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a moving object", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "birds chirp and objects are moved around"], "sample_ids": ["wqZ135Ssz0", "yPUYU6t3rwo"], "start_seconds": ["60", "370"], "properties": ["man, woman, squawks", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["s59PfAghdkM", "sLUnaPT5gM8"], "start_seconds": ["0", "0"], "properties": ["bird, chirp, background, horse, neigh", "loud, laughter, intermittent"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["birds vocalize and a man speaks", "wind blowing followed by a zoom"], "sample_ids": ["v0wPrLBI3hg", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["vocalize, bird, speak", "wind, blow, zoom"], "captions_pred_video": ["footage of the pigeons feeding on the ground", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a train horn sounds as it passes by", "winds blows roughly as a vehicle races past"], "sample_ids": ["ukg5L09Wpvo", "xjvTpk2Zpr8"], "start_seconds": ["150", "70"], "properties": ["sound, train, horn", "wind, blows, vehicle"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a drill drills through something then people begin laughing", "a vehicle engine accelerating then running on idle"], "sample_ids": ["tEE3MpBt1sg", "vYkA3cfXp5Q"], "start_seconds": ["50", "30"], "properties": ["drill, something, laugh", "engine, accelerate, idle"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "an engine is idling"], "question": "which is a vehicle", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "a child speaks in closed space"], "sample_ids": ["sNB8zxXneIM", "yW6FWLSLkx4"], "start_seconds": ["20", "40"], "properties": ["several, quack, cocks", "child, space, speak"], "captions_pred_video": ["a group of geese in a cage", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "pigeons vocalize and birds chirp"], "sample_ids": ["tIY7qOV3rEM", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "vocalize, bird, chirp"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "of the pigeon in the cage"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a man is speaking and a bee is buzzing"], "question": "which animal is vocalizing", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["tQWGZLItBXk", "tiDFTC-5vU"], "start_seconds": ["170", "30"], "properties": ["voice, music, whoosh", "male, duck, laugh"], "captions_pred_video": ["worms revolution screenshots", null], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking?", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "wind blowing followed by a zoom"], "sample_ids": ["xjvTpk2Zpr8", "vr8ZXjEBhMQ"], "start_seconds": ["70", "150"], "properties": ["wind, blows, vehicle", "wind, blow, zoom"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a jet engine roars and wind blows ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more calm", "label": 1}, {"captions": ["roadway noise occurs and a truck accelerates", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["tgbONvsP47Y", "vbZ-0lGPneg"], "start_seconds": ["0", "30"], "properties": ["noise, truck, accelerate", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a fire truck entering a garage", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a car is driving on the road ", "a woman is speaking and a dog is whimpering"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["tiDFTC-5vU", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["male, duck, laugh", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a woman is speaking and a dog is whimpering"], "question": "which entity has a duck quacking?", "label": 0}, {"captions": ["an aircraft engine runs as wind blows heavily", "an engine runs loudly"], "sample_ids": ["xjvTpk2Zpr8", "vqZuVbG6-HI"], "start_seconds": ["70", "130"], "properties": ["engine, run, wind", "loud, engine, run"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a lawn mower is running and men are speaking "], "question": "which entity is running", "label": 1}, {"captions": ["humming and rattling of an engine idling as it revs", "a car speeding up in the distance"], "sample_ids": ["xMXvkIcaG0Y", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["sound, humming, rattling", "distance, car, speed"], "captions_pred_video": ["footage of a car's hood being opened up to reveal the engine underneath the hood", null], "captions_pred_audio": ["an engine is revving and accelerating ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "an infant crying as a woman laughs"], "sample_ids": ["sZPuqDgX2V0", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["commentator, race, track", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vbr9mHKc8WM", "wz7N8YRy74I"], "start_seconds": ["40", "30"], "properties": ["noise, loudness, engine", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["an engine is idling", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is quieter", "label": 0}, {"captions": ["an engine runs and a man speaks", "some liquid flows while a woman laughs and man talks"], "sample_ids": ["yT5WfYMRr-U", "vddP56-ogds"], "start_seconds": ["30", "30"], "properties": ["engine, run, man", "liquid, laughs, man"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "water is running and gurgling and a man is speaking"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["speaking following by laughing and clapping", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["u2f5NpsoHBg", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["person, laugh, clap", "airplane, boy, fly"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "a car speeds away loudly followed by a car revving loudly and driving away while outside"], "sample_ids": ["x5cuQjOdM3E", "sjlVMgdGSK0"], "start_seconds": ["30", "30"], "properties": ["cat, talk, meow", "car, revving, loudly"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a 1965 ford falcon drag racing at 100mph on a 1/8 mile track"], "captions_pred_audio": ["a cat meows and a woman speaks", "a car accelerates and revs its engine "], "question": "which entity is louder", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "a speedboat passes quickly on the water"], "sample_ids": ["zkKdxzNC97Y", "tjmoSi330GM"], "start_seconds": ["27", "23"], "properties": ["hard, surface, door", "speed, water, boat"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "a person riding a jet ski on a lake with trees in the background"], "captions_pred_audio": ["a door is opened and closed", "a motorboat speeds through water with wind noise "], "question": "which object is moving on a hard surface", "label": 0}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "a duck quacks continuously"], "sample_ids": ["zALy31PjDl0", "vh30P49Po6s"], "start_seconds": ["21", "30"], "properties": ["a man, a vehicle, a horn", "quacks, continuously, duck"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "a man speaks as bees buzz and birds chirp"], "sample_ids": ["smDKStoHBJo", "t25U-v4k4ts"], "start_seconds": ["0", "40"], "properties": ["a, talk, baby, cry", "bees buzz, birds chirp, man speaks"], "captions_pred_video": ["a man holding a crying baby in his arms", "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking and bees are buzzing"], "question": "which entity has a baby?", "label": 0}, {"captions": ["wind loudly blowing while people speak in the background followed by a horn blowing", "some tunes played by whistling"], "sample_ids": ["xjhAnI2q6hM", "u6BnG6YZqJ4"], "start_seconds": ["6", "0"], "properties": ["wind, blow, loudly", "tune, play, whistling"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a person whistling a song"], "question": "which entity is playing a tune", "label": 1}, {"captions": ["scraping and female speech with distant music", "a cat meows and children speak"], "sample_ids": ["yHeVV-xeOxQ", "x5cuQjOdM3E"], "start_seconds": ["130", "30"], "properties": ["female, speech, music", "cat, speak, children"], "captions_pred_video": ["of a girl milking a goat's udder", "a black background with an airplane flying in the sky"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a cat meows and a woman speaks"], "question": "which entity is more likely to be a cat", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "wind blows as people chatter quietly"], "sample_ids": ["yZmhM1HcsyE", "xBxDz0CFVn0"], "start_seconds": ["4", "30"], "properties": ["engine, roar, water", "wind, chatter, people"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "footage is blurry and out of focus"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["an airplane accelerates briefly", "pigeons vocalize and birds chirp"], "sample_ids": ["zjTG0gaGCUI", "uiS58TNyUiw"], "start_seconds": ["80", "430"], "properties": ["accelerates, airplane, briefly", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "a infant makes noise and is excited"], "sample_ids": ["slZLHwNbbt4", "wIJK3-5y0kA"], "start_seconds": ["300", "30"], "properties": ["a, horn, run", "noise, excited, infant"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vbpKkWvfOu4", "zl9Dqx-j7q4"], "start_seconds": ["560", "6"], "properties": ["a, man, speaks", "engine, laugh, loud"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "a dark barks and whimpers"], "sample_ids": ["un9VQlzgZM", "sYj4hpDUZDQ"], "start_seconds": ["5", "30"], "properties": ["females, talk, laugh", "barks, whimpers, dark"], "captions_pred_video": [null, "a brown and white dog standing in front of a wall with its mouth open"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a dog barks and a cat meows"], "question": "which entity is a dog", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "a car speeding up in the distance"], "sample_ids": ["sNB8zxXneIM", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["several, quack, cocks", "distance, car, speed"], "captions_pred_video": ["a group of geese in a cage", null], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["an airplane engine spools and people speak", "a woman speaks as she rubs two objects together"], "sample_ids": ["wTjoRj1se3U", "vzxHnu-SFEw"], "start_seconds": ["390", "80"], "properties": ["airplane, engine, spool", "two objects, woman, speak"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a jet engine is running and people are talking", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a machine?", "label": 0}, {"captions": ["a man talks while a clock does ticktock", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["spYNpeN7rPY", "tdWhHV3X25Q"], "start_seconds": ["1", "60"], "properties": ["a clock, ticktock, man", "applause, audience, yells"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "white noise and snoring with some rustling in the background"], "sample_ids": ["wztCSUxOf8", "xzKKf9bKNUo"], "start_seconds": ["130", "10"], "properties": ["a crowd, yells, applauds", "background, noise, snoring"], "captions_pred_video": [null, "shows a woman laying on a bed with her eyes closed and her mouth open"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a person snoring loudly"], "question": "which entity is more quiet", "label": 1}, {"captions": ["some tunes played by whistling", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["u6BnG6YZqJ4", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["tune, play, whistling", "airplane, boy, fly"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a person whistling a song", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "some men converse over an engine running"], "sample_ids": ["uPDn2BFTHk", "sCiy7QS1U"], "start_seconds": ["140", "300"], "properties": ["lady, laugh, baby", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a baby?", "label": 0}, {"captions": ["a person is burping then speaks and laughs", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["wAAkbZToh8", "y8WEcpOlT3I"], "start_seconds": ["0", "40"], "properties": ["burp, laugh, speak", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man burps and a woman speaks", "a man is speaking with wind noise in the background "], "question": "which entity is about a person speaking and laughing?", "label": 0}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "a propeller rotates loudly and intensely"], "sample_ids": ["uRExseg-0XI", "ugHJF0hfYkg"], "start_seconds": ["210", "10"], "properties": ["woman, man, water", "loud, intense, propeller"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a helicopter is flying overhead "], "question": "which entity is more quiet", "label": 0}, {"captions": ["an electric engine works nearby followed by a child talking", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["xSKJGCItUWE", "w5W5Kqtc8E"], "start_seconds": ["10", "100"], "properties": ["engine, work, child", "wind, blow, vehicle"], "captions_pred_video": ["footage of the helicopter flying in the room", null], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a vehicle engine?", "label": 1}, {"captions": ["an infant crying as a woman laughs", "popping and crackling repeats as men yell and laugh"], "sample_ids": ["xhmRY9yhC7c", "rqu8iB22IY"], "start_seconds": ["20", "5"], "properties": ["a, laugh, infant", "sound, repeats, laugh"], "captions_pred_video": ["of a baby crying in a baby bouncer", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a dog barks and a man speaks while music plays "], "question": "which entity has a woman laugh?", "label": 0}, {"captions": ["a large crowd cheers and applauds", "a man speaks as a machine runs"], "sample_ids": ["rqfQRErjfk8", "vD6lYD1l0BY"], "start_seconds": ["170", "330"], "properties": ["crowd, cheers, applauds", "a, machine, run"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "game controller being held in the hands of the person"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a man is speaking and dishes are being washed "], "question": "which entity is a machine", "label": 1}, {"captions": ["water splashes and a door squeaks", "a man speaks over intermittent keyboard taps"], "sample_ids": ["sdXV-ylviw", "tw76HGONaKg"], "start_seconds": ["190", "570"], "properties": ["sound, splash, door", "audio, man, keyboard"], "captions_pred_video": [null, "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a dog barks and taps with background noise ", "a man speaks and types on a computer keyboard "], "question": "which entity has a door?", "label": 0}, {"captions": ["a guy sneezes followed by another guy speaking and whistling and then a pigeon coos", "birds chirp and animals vocalize"], "sample_ids": ["vhIcOufIwo4", "sxIvBMSavMQ"], "start_seconds": ["30", "210"], "properties": ["sneeze, speaking, pigeon", "vocalize, chirp, animal"], "captions_pred_video": ["footage of a pigeon in a cage", "beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a"], "captions_pred_audio": ["a man is speaking and pigeons are cooing", "birds are chirping and insects are buzzing"], "question": "which entity is about animals vocalizing?", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "an airplane engine spools and people speak"], "sample_ids": ["s3cTDAj31g", "wTjoRj1se3U"], "start_seconds": ["80", "390"], "properties": ["man, talk, woman", "airplane, engine, spool"], "captions_pred_video": [null, "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is speaking and a baby is crying", "a jet engine is running and people are talking"], "question": "which entity is a video of a man talking and a woman shouting?", "label": 0}, {"captions": ["a male speaks over some small clicks", "a bird is chirping and tweeting a bird song"], "sample_ids": ["uXxVebHsGZ8", "wPz6QRAkEb4"], "start_seconds": ["30", "60"], "properties": ["male, clicks, speak", "chirps, tweets, song"], "captions_pred_video": [null, "a bird in a cage on top of a pole"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "birds are chirping in the background "], "question": "which entity is a bird", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "dishes cling together then a man begins to speak"], "sample_ids": ["y8WEcpOlT3I", "sQGXqGcwOTc"], "start_seconds": ["40", "3"], "properties": ["wind, speak, buffeting", "cling, speak, dishes"], "captions_pred_video": ["on how to use a sewing machine youtube", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "mechanisms are operating and water is splashing "], "question": "which entity is about speaking?", "label": 0}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a person snores loudly multiple times at a close distance"], "sample_ids": ["uEU-Hg5MTN8", "sSMl2vc3ek"], "start_seconds": ["27", "20"], "properties": ["a woman, laughs, animal", "loud, multiple, distance"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a kid speaks followed by music playing", "an insect buzzes around continuously"], "sample_ids": ["tQWGZLItBXk", "v25l1jef3JY"], "start_seconds": ["170", "0"], "properties": ["music, kid, speak", "buzzes, continuously, insect"], "captions_pred_video": ["worms revolution screenshots", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a fly is buzzing around a microphone "], "question": "which entity is not a person", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "a man speaks as a car is passing by"], "sample_ids": ["sZPuqDgX2V0", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["commentator, race, track", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a man is speaking with background noise and breathing sounds "], "question": "which is not a race", "label": 1}, {"captions": ["someone is snoring while sleeping", "a person snores loudly multiple times at a close distance"], "sample_ids": ["ujMt0-D-x2k", "sSMl2vc3ek"], "start_seconds": ["0", "20"], "properties": ["snore, sleep, someone", "loud, multiple, distance"], "captions_pred_video": ["of the dog playing with a toy on the floor", null], "captions_pred_audio": ["a person is snoring loudly", "a person snoring loudly"], "question": "which entity is snoring while sleeping", "label": 0}, {"captions": ["an airplane engine roars increasingly louder", "a clock ticktocks"], "sample_ids": ["vBslzh7saPw", "v-g-j2uTByM"], "start_seconds": ["90", "30"], "properties": ["engine, roar, louder", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "some tunes played by whistling"], "sample_ids": ["vs65y4qmyBE", "u6BnG6YZqJ4"], "start_seconds": ["340", "0"], "properties": ["engine, run, man", "tune, play, whistling"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a clicking followed by some people laughing and a kid speaking", "a car speeding up in the distance"], "sample_ids": ["vz8868znkVQ", "u0TrcHhkPQ"], "start_seconds": ["60", "20"], "properties": ["audio, click, kid speaking", "distance, car, speed"], "captions_pred_video": ["a video of a plane flying over a cloudy sky", null], "captions_pred_audio": ["a baby is laughing and breathing with background noise ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["various birds chirp and squeal, and an animal grunts", "a man speaks as a motor runs in the background"], "sample_ids": ["tDlysoZiA1I", "xZepNM9qcRA"], "start_seconds": ["0", "30"], "properties": ["animal, grunts, chirps", "background, motor, run"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "vehicles pass by on a roadway"], "sample_ids": ["wvKpEYswXO0", "tgbONvsP47Y"], "start_seconds": ["150", "0"], "properties": ["water, tap, run", "pass, vehicle, roadway"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sSMl2vc3ek", "yajyRTUQk3U"], "start_seconds": ["20", "400"], "properties": ["a person, laughs, snores", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "dogs bark as an engine runs and a person whistles"], "sample_ids": ["xfudFO976zE", "zY3icUyMdh8"], "start_seconds": ["0", "20"], "properties": ["animal, bleats, cry", "dog, bark, engine"], "captions_pred_video": ["footage is blurry and shaky", "footage of a bus driving through a residential street at night"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a car is driving and dogs are barking and squealing "], "question": "which entity is more active", "label": 1}, {"captions": ["a dog barks and whimpers", "a car speeding up in the distance"], "sample_ids": ["sShpyu2l4YQ", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["barks, whimpers, dog", "distance, car, speed"], "captions_pred_video": ["the puppies are playing with a toy", null], "captions_pred_audio": ["a dog is barking and growling", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "a piece of wood is being placed down and sawed"], "sample_ids": ["tQWGZLItBXk", "uiItxDsDMFI"], "start_seconds": ["170", "30"], "properties": ["music, person, ding", "wood, piece, saw"], "captions_pred_video": ["worms revolution screenshots", "a man cutting a log with an axe in the woods"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a saw is being used with background noise "], "question": "which entity is about a person speaking?", "label": 0}, {"captions": ["a small engine spits as it runs", "small dogs yip and bark sharply"], "sample_ids": ["sZvwOuuPGP0", "v-wcQf4BDY0"], "start_seconds": ["50", "120"], "properties": ["spits, engine, runs", "bark, yip, sharply"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a medium engine is running ", "a dog barks and growls"], "question": "which entity is more likely to be a person", "label": 1}, {"captions": ["a person is snoring while sleeping", "people speak as gunfire rings out"], "sample_ids": ["vJrjSeP17yE", "wqTCwqVRDlk"], "start_seconds": ["40", "80"], "properties": ["a person is sleeping, snoring, person", "gunfire, ring, speak"], "captions_pred_video": ["a black background with a small plane flying in the sky", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking and a gun is fired"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a motorcycle idles loudly as wind blows", "water splashes as an animal walks through"], "sample_ids": ["v7jJS8aAyA", "w1ir-sZ3Im8"], "start_seconds": ["10", "90"], "properties": ["wind, blows, loudly", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "water splashes and gurgles as people speak"], "question": "which entity is more quiet", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "water flows and trickles"], "sample_ids": ["vqZuVbG6-HI", "tB7hWb9gTuQ"], "start_seconds": ["130", "30"], "properties": ["background, male, female", "water, flow, trickle"], "captions_pred_video": ["footage is blurry because it's raining outside", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "water is splashing and gurgling"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "a train horn blows as it passes by"], "sample_ids": ["sSMl2vc3ek", "zVacuqSb4LI"], "start_seconds": ["20", "30"], "properties": ["a person, laughs, snores", "horn, blows, train"], "captions_pred_video": [null, "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a person snoring loudly", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "motors rev and run loudly as a person laughs"], "sample_ids": ["vqZuVbG6-HI", "zl9Dqx-j7q4"], "start_seconds": ["130", "6"], "properties": ["background, male, female", "motors rev, laugh, loudly"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "wind blowing followed by a zoom"], "sample_ids": ["zl9Dqx-j7q4", "vr8ZXjEBhMQ"], "start_seconds": ["6", "150"], "properties": ["engine, laugh, loud", "wind, blow, zoom"], "captions_pred_video": ["footage of a man driving a car in the dark", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a jet engine roars ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["an emergency siren wails as it passes", "a woman speaks as she rubs two objects together"], "sample_ids": ["vGj1XLJvNrw", "vzxHnu-SFEw"], "start_seconds": ["0", "80"], "properties": ["wails, wails, pass", "two objects, woman, speak"], "captions_pred_video": ["footage of a police car driving down a city street", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is silent", "label": 1}, {"captions": ["various birds chirp and squeal, and an animal grunts", "small dogs yip and bark sharply"], "sample_ids": ["tDlysoZiA1I", "v-wcQf4BDY0"], "start_seconds": ["0", "120"], "properties": ["animal, grunts, chirps", "bark, yip, sharply"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "a car accelerates and wind blows"], "sample_ids": ["tPJvjq9QePY", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["animal, bleat, moo", "accelerates, wind, blows"], "captions_pred_video": ["a dog and a sheep in a barn", null], "captions_pred_audio": ["a baby cries and a man speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks with water running", "several insects fly while two men talk"], "sample_ids": ["wTideSjRFS0", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["water, running, woman", "several, fly, men"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more moving parts", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "birds chirp and objects are moved around"], "sample_ids": ["sapQIQUhFc", "yPUYU6t3rwo"], "start_seconds": ["280", "370"], "properties": ["water, trickles, flow", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks as a machine runs", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vD6lYD1l0BY", "su6FAOcOA8c"], "start_seconds": ["330", "4"], "properties": ["a, machine, run", "engine, idle, woman"], "captions_pred_video": ["game controller being held in the hands of the person", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "a clock ticktocks"], "sample_ids": ["uKCSGgof8gI", "v-g-j2uTByM"], "start_seconds": ["12", "30"], "properties": ["chirps, distance, signal", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vuUVPzd2FXw", "tdWhHV3X25Q"], "start_seconds": ["160", "60"], "properties": ["a, steam, release", "applause, audience, yells"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["zofjfKhqLk8", "su6FAOcOA8c"], "start_seconds": ["10", "4"], "properties": ["background, metal, clings", "engine, idle, woman"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "water splashes as an animal walks through"], "sample_ids": ["voJh2gJxXhA", "w1ir-sZ3Im8"], "start_seconds": ["50", "90"], "properties": ["music, frog, croak", "animal, water, splashes"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["music is playing and crickets are chirping ", "water splashes and gurgles as people speak"], "question": "which entity is about a frog?", "label": 0}, {"captions": ["a drill runs and two people laugh", "a man speaks as a motor runs in the background"], "sample_ids": ["tEE3MpBt1sg", "xZepNM9qcRA"], "start_seconds": ["50", "30"], "properties": ["two people, laugh, drill", "background, motor, run"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["people clap and speak in the distance", "a speedboat passes quickly on the water"], "sample_ids": ["wwyfGO2J4", "tjmoSi330GM"], "start_seconds": ["90", "23"], "properties": ["clap, distance, speak", "speed, water, boat"], "captions_pred_video": [null, "a person riding a jet ski on a lake with trees in the background"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a motorboat speeds through water with wind noise "], "question": "which is moving faster", "label": 0}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["siJFXfGWgDk", "yajyRTUQk3U"], "start_seconds": ["50", "400"], "properties": ["a, bird, vehicle", "a woman, something, fried"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a woman is speaking while food is frying in the background"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["multiple birds vocalize and wind blows", "vehicles pass by on a roadway"], "sample_ids": ["uoGVs9yUqY4", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["multiple, vocalize, wind", "pass, vehicle, roadway"], "captions_pred_video": ["for how to make a wooden shed door youtube", "footage of a fire truck entering a garage"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["tQWGZLItBXk", "tDVADusiIoc"], "start_seconds": ["170", "60"], "properties": ["music, person, ding", "water, radio, man"], "captions_pred_video": ["worms revolution screenshots", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "a vehicles accelerate quickly and someone laughs"], "sample_ids": ["vBslzh7saPw", "uWPRNLnpy7Y"], "start_seconds": ["90", "10"], "properties": ["power, scream, increase", "accelerate, laugh, vehicle"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "is taken from a car driving down the street"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["xjvTpk2Zpr8", "uZesmtKZGSw"], "start_seconds": ["70", "250"], "properties": ["wind, blows, vehicle", "men, talk, cars"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more likely to be a movie", "label": 1}, {"captions": ["several ducks are quacking and squawking", "a man speaks as a machine runs"], "sample_ids": ["wfHeoPDLMaM", "vD6lYD1l0BY"], "start_seconds": ["30", "330"], "properties": ["quacking, squawking, ducks", "a, machine, run"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "game controller being held in the hands of the person"], "captions_pred_audio": ["ducks are quacking", "a man is speaking and dishes are being washed "], "question": "which entity is not a person?", "label": 0}, {"captions": ["distant men speak as a spray can nozzle is depressed", "a vehicle engine accelerating then running on idle"], "sample_ids": ["rwtmaKiCcQU", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["nozzle, depressed, spray can", "engine, accelerate, idle"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["spraying and people speaking", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["birds fly and flutter around", "a vehicle engine accelerating then running on idle"], "sample_ids": ["wGKgwOP3h30", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["fly, flutter, around", "engine, accelerate, idle"], "captions_pred_video": ["of the pigeons in the coop", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["pigeons coo and flap their wings", "an engine is idling"], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["electronic beeps occur in a short series", "water running down a sink while a man is talking"], "sample_ids": ["y682ml90jGw", "vSeGhaZt-aI"], "start_seconds": ["11", "50"], "properties": ["beeps, series, electronic", "water, sink, talk"], "captions_pred_video": [null, "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a beeping sound is being made ", "a man is speaking and pouring liquid with background noise "], "question": "which entity is a video", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "a man speaks followed by another man speaking outside"], "sample_ids": ["wwyfGO2J4", "viuTg1M-dqg"], "start_seconds": ["90", "30"], "properties": ["people, applaud, hoot", "two men, speak, follow"], "captions_pred_video": [null, "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a man is speaking with background noise and breathing sounds "], "question": "which entity shows two men speaking?", "label": 1}, {"captions": ["a baby cries and a woman moans", "an insect buzzes around continuously"], "sample_ids": ["smDKStoHBJo", "v25l1jef3JY"], "start_seconds": ["0", "0"], "properties": ["a, cry, woman", "buzzes, continuously, insect"], "captions_pred_video": ["a man holding a crying baby in his arms", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a fly is buzzing around a microphone "], "question": "which entity is not a person", "label": 1}, {"captions": ["a telephone rings and a bird vocalizes", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["skd2PphS6oI", "tDlysoZiA1I"], "start_seconds": ["190", "0"], "properties": ["ring, bird, vocalize", "animal, grunts, chirps"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a telephone bell rings repeatedly ", "birds are chirping and a rooster is crowing "], "question": "which entity has a bird vocalize?", "label": 0}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "someone is typing on a computer keyboard"], "sample_ids": ["ylpYOorfH4o", "v0x1odnXtP0"], "start_seconds": ["410", "210"], "properties": ["engine, run, loud", "keyboard, type, computer"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vfYTJq7nU", "yajyRTUQk3U"], "start_seconds": ["130", "400"], "properties": ["ducks, quack, man", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "a toilet door squeaks as it is opened"], "sample_ids": ["sShpyu2l4YQ", "sdXV-ylviw"], "start_seconds": ["0", "190"], "properties": ["growl, bark, yip", "door, toilet, squeaks"], "captions_pred_video": ["the puppies are playing with a toy", null], "captions_pred_audio": ["a dog is barking and growling", "a dog barks and taps with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sU53zg9Jp7s", "uZesmtKZGSw"], "start_seconds": ["380", "250"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "men, talk, cars"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["several ducks quack and cocks crow far away", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["sNB8zxXneIM", "tDVADusiIoc"], "start_seconds": ["20", "60"], "properties": ["several, quack, cocks", "water, radio, man"], "captions_pred_video": ["a group of geese in a cage", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water?", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "tapping occurs then a baby cries"], "sample_ids": ["x5cuQjOdM3E", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["cat, meows, young woman", "a, cry, baby"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a cat meows and a woman speaks", "a baby cries and a woman speaks"], "question": "which entity is crying", "label": 1}, {"captions": ["several insects fly while two men talk", "some men converse over an engine running"], "sample_ids": ["s-T9OVOiMLo", "sCiy7QS1U"], "start_seconds": ["330", "300"], "properties": ["several, fly, men", "men, converse, engine"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", null], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has more people", "label": 1}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["wqZ135Ssz0", "sLUnaPT5gM8"], "start_seconds": ["60", "0"], "properties": ["two men, woman, birds", "loud, laughter, intermittent"], "captions_pred_video": [null, "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["two women and a man talk while a kid cries", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["wyllXV6PjKo", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["a kid, talk, cry", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman speaks and a baby cries", "a woman is speaking and a baby is crying"], "question": "which entity has a kid?", "label": 0}, {"captions": ["a clock ticktocks briefly", "a person is burping then speaks and laughs"], "sample_ids": ["u7C-AEBQM", "wAAkbZToh8"], "start_seconds": ["30", "0"], "properties": ["ticktocks, clock, ticktocks briefly", "burp, laugh, speak"], "captions_pred_video": [null, null], "captions_pred_audio": ["a ticktock of a clock", "a man burps and a woman speaks"], "question": "which entity is speaking?", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "several insects fly while two men talk"], "sample_ids": ["yajyRTUQk3U", "s-T9OVOiMLo"], "start_seconds": ["400", "330"], "properties": ["a woman, something, fried", "several, fly, men"], "captions_pred_video": ["- a woman cooking in the kitchen", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a woman talking?", "label": 0}, {"captions": ["an engine starts and increases in power", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["zjTG0gaGCUI", "zFjIWfSD-4"], "start_seconds": ["80", "410"], "properties": ["power, increase, engine", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["a jet engine roars as wind blows ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a motor?", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["xvDdE3zNf8Y", "wDVMhEdTiVw"], "start_seconds": ["120", "30"], "properties": ["a, female, speaks", "gun, shoot, water"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a woman speaks and crumples paper", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more violent", "label": 1}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "vehicles pass by on a roadway"], "sample_ids": ["wqZ135Ssz0", "tgbONvsP47Y"], "start_seconds": ["60", "0"], "properties": ["two men, woman, birds", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a car is driving on the road "], "question": "which entity is more active", "label": 1}, {"captions": ["a helicopter engine runs continuously", "vehicles pass by on a roadway"], "sample_ids": ["ugHJF0hfYkg", "tgbONvsP47Y"], "start_seconds": ["10", "0"], "properties": ["engine, running, continuously", "pass, vehicle, roadway"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a helicopter is flying overhead ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["continuous sneezing together with speech", "wind blowing followed by a zoom"], "sample_ids": ["x4dZyf9Gbj0", "vr8ZXjEBhMQ"], "start_seconds": ["130", "150"], "properties": ["continuous, sneeze, speech", "wind, blow, zoom"], "captions_pred_video": ["footage is blurry and out of focus", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman sneezes and speaks", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a door opens and closes", "several insects fly while two men talk"], "sample_ids": ["vBHyYJ8pL0", "s-T9OVOiMLo"], "start_seconds": ["2", "330"], "properties": ["open, close, door", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more likely to be in a museum", "label": 1}, {"captions": ["a frog vocalizes while birds chirp", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["vMf1dLD6Sng", "sLUnaPT5gM8"], "start_seconds": ["6", "0"], "properties": ["frog, bird, vocalize", "loud, laughter, intermittent"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a frog croaks loudly", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a female speaks softly as paper crinkles", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["xvDdE3zNf8Y", "tiDFTC-5vU"], "start_seconds": ["120", "30"], "properties": ["a, female, speaks", "male, duck, laugh"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", null], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking and ducks are quacking"], "question": "which entity is more likely to be a person speaking?", "label": 0}, {"captions": ["frogs croak and vocalize", "wind blowing followed by a zoom"], "sample_ids": ["yswmmRZFItk", "vr8ZXjEBhMQ"], "start_seconds": ["0", "150"], "properties": ["croak, vocalize, frog", "wind, blow, zoom"], "captions_pred_video": ["a close up of a frog in the water", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a frog is croaking", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "waves crash against a shoreline and people speak"], "sample_ids": ["w2JXXIAdUdg", "yFB25fqfU8I"], "start_seconds": ["10", "300"], "properties": ["snoring, distance, person", "wave, crash, shoreline"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be a recording", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "water is sprayed across a hard surface"], "sample_ids": ["sncRqQ67iJU", "sQwlkXjQabo"], "start_seconds": ["460", "10"], "properties": ["loud, repeatedly, man", "water, spray, surface"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a person is snoring", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["water running down a sink while a man is talking", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vSeGhaZt-aI", "zl9Dqx-j7q4"], "start_seconds": ["50", "6"], "properties": ["water, sink, talk", "engine, laugh, loud"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["there are rhythmical snoring nearby", "vehicles pass by on a roadway"], "sample_ids": ["ujMt0-D-x2k", "tgbONvsP47Y"], "start_seconds": ["0", "0"], "properties": ["snoring, rhythmical, nearby", "pass, vehicle, roadway"], "captions_pred_video": ["of the dog playing with a toy on the floor", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a person is snoring loudly", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "paper is crumpling consistently"], "sample_ids": ["vBslzh7saPw", "v5cSxLaHADY"], "start_seconds": ["90", "0"], "properties": ["power, scream, increase", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a jet engine roars and accelerates ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a person is burping while a girl speaks"], "sample_ids": ["wz7N8YRy74I", "vdoxuJn9lTc"], "start_seconds": ["30", "40"], "properties": ["rooster, crow, background, people", "person, burp, girl"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "a group of young girls playing a video game together in a living room"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a child speaks followed by a burp"], "question": "which entity is a person speaking to a rooster?", "label": 0}, {"captions": ["a flush is followed by gurgling water, then another flush", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["tqR406bGiE", "wz7N8YRy74I"], "start_seconds": ["40", "30"], "properties": ["flush, water, gurgle", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a video of a toilet flushing?", "label": 0}, {"captions": ["birds tweet and squawk", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["w1mlz3Pe4fU", "xfaoyyzw2WU"], "start_seconds": ["300", "180"], "properties": ["squawk, tweet, scream", "loud, jet engine, roar"], "captions_pred_video": ["of a bird in a cage", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["birds are chirping and singing", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "a man speaks as a car is passing by"], "sample_ids": ["uYT5gxnyMWM", "sK4u5T8hW78"], "start_seconds": ["50", "30"], "properties": ["a, scream, girl", "a, car, pass"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["an airplane engine roars increasingly louder", "a woman speaks as she rubs two objects together"], "sample_ids": ["vBslzh7saPw", "vzxHnu-SFEw"], "start_seconds": ["90", "80"], "properties": ["engine, roar, louder", "two objects, woman, speak"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is silent", "label": 1}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "a car speeding up in the distance"], "sample_ids": ["smDKStoHBJo", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["a, infant, speaking", "distance, car, speed"], "captions_pred_video": ["a man holding a crying baby in his arms", null], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a vehicles accelerate quickly and someone laughs", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["uWPRNLnpy7Y", "zj2R0XoFr5k"], "start_seconds": ["10", "50"], "properties": ["accelerate, laugh, vehicle", "airplane, boy, fly"], "captions_pred_video": ["is taken from a car driving down the street", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "dogs bark as an engine runs and a person whistles"], "sample_ids": ["vcmWSmvti8", "zY3icUyMdh8"], "start_seconds": ["30", "20"], "properties": ["music, man, fire", "dog, bark, engine"], "captions_pred_video": [null, "footage of a bus driving through a residential street at night"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a car is driving and dogs are barking and squealing "], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "a person snores loudly multiple times at a close distance"], "sample_ids": ["w5W5Kqtc8E", "sSMl2vc3ek"], "start_seconds": ["100", "20"], "properties": ["wind, blow, vehicle", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["electronic beeps occur in a short series", "waves crash against a shoreline and people speak"], "sample_ids": ["y682ml90jGw", "yFB25fqfU8I"], "start_seconds": ["11", "300"], "properties": ["beeps, series, electronic", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["a beeping sound is being made ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["birds chirp and wind blows", "waves crash against a shoreline and people speak"], "sample_ids": ["sxIvBMSavMQ", "yFB25fqfU8I"], "start_seconds": ["210", "300"], "properties": ["birds, chirp, wind", "wave, crash, shoreline"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "footage of a person surfing in the ocean"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["slZLHwNbbt4", "vlS6YMeWAPo"], "start_seconds": ["300", "40"], "properties": ["clap, distance, horn", "sheep, baa, birds"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "soft movement is accompanied by clocks ticking in the background"], "sample_ids": ["uEU-Hg5MTN8", "vlJS7LN2XyM"], "start_seconds": ["27", "30"], "properties": ["animal, grunts, snorts", "background, clocks, ticking"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a ticktock of a clock"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "a man speaks as a car is passing by"], "sample_ids": ["v5P-ThUCINM", "sK4u5T8hW78"], "start_seconds": ["400", "30"], "properties": ["background, chirp, bird", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "an infant crying as a woman laughs"], "sample_ids": ["wjsXBsc7M40", "xhmRY9yhC7c"], "start_seconds": ["10", "20"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "a, laugh, infant"], "captions_pred_video": ["footage of the baby playing with a toothbrush", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a baby cries and a woman speaks"], "question": "which entity is a video of a baby laughing?", "label": 0}, {"captions": ["a toilet flushes and water sputters as it drains", "a woman speaks as she rubs two objects together"], "sample_ids": ["smGI3C1NZc", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["water, drain, toilet", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a process", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "a man speaks as a motor runs in the background"], "sample_ids": ["sapQIQUhFc", "xZepNM9qcRA"], "start_seconds": ["280", "30"], "properties": ["liquid, flow, distance", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a child yells and another yells", "an engine runs loudly"], "sample_ids": ["vMDHu7Lxcgw", "vqZuVbG6-HI"], "start_seconds": ["410", "130"], "properties": ["two, yell, child", "loud, engine, run"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a lawn mower is running and men are speaking "], "question": "which is louder", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["sTpirNYo8vQ", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["a, tone, fast", "a, scream, girl"], "captions_pred_video": ["of a man taking a selfie on a bus", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a man speaks as a motor runs in the background"], "sample_ids": ["sfAvvZwdLCY", "xZepNM9qcRA"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "background, motor, run"], "captions_pred_video": ["footage of the toilet in the bathroom", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a toilet is flushed", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["tapping occurs then a baby cries", "people speak as gunfire rings out"], "sample_ids": ["wIJK3-5y0kA", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["a, cry, baby", "gunfire, ring, speak"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a man talks while vehicles pass by", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["sK4u5T8hW78", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["a, man, talk", "People, motor, brakes"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has more people", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "a frog vocalizes while birds chirp"], "sample_ids": ["wTideSjRFS0", "vMf1dLD6Sng"], "start_seconds": ["30", "6"], "properties": ["food, sizzle, woman", "frog, bird, vocalize"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "a frog in a pond with pink flowers in the background"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a frog croaks loudly"], "question": "which entity is a frog?", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "a propeller rotates loudly and intensely"], "sample_ids": ["xzKKf9bKNUo", "ugHJF0hfYkg"], "start_seconds": ["10", "10"], "properties": ["background, noise, snoring", "loud, intense, propeller"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a person snoring loudly", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["wnpJndXuxLc", "tDVADusiIoc"], "start_seconds": ["50", "60"], "properties": ["blows, vehicle, train", "water, radio, man"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a train?", "label": 0}, {"captions": ["some clanking with distant murmuring", "a man speaks with another voice speaking in the background"], "sample_ids": ["uMTTDZ2mb4", "u21-Z5gJCB8"], "start_seconds": ["30", "30"], "properties": ["clanking, murmuring, distant", "background, voice, man"], "captions_pred_video": [null, "- a person cooking eggs in a pan on the stove"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking with another voice speaking in the background?", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["x5cuQjOdM3E", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["cat, meows, young woman", "rooster, crow, background, men"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is more domesticated", "label": 0}, {"captions": ["a motor runs and stops, and animals squawk and croak", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["s4tUs779vBA", "w34HjHr6gAY"], "start_seconds": ["160", "30"], "properties": ["a, sound, stop", "beeps, hit, woman"], "captions_pred_video": ["game in 10 words or less - screenshot 1", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a car is revving and a man is speaking ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "a baby cries and a woman moans"], "sample_ids": ["spJCm8tD9Zo", "smDKStoHBJo"], "start_seconds": ["90", "0"], "properties": ["snores, wheezes, sleeps", "a, cry, woman"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a man holding a crying baby in his arms"], "captions_pred_audio": ["a person is snoring loudly", "a baby is crying and a woman is speaking"], "question": "which entity is a person", "label": 0}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "a child speaks in closed space"], "sample_ids": ["ziUT9IFTkjg", "yW6FWLSLkx4"], "start_seconds": ["10", "40"], "properties": ["background, birds, rustling", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "people cheer as a vehicle engine revs"], "sample_ids": ["sfAvvZwdLCY", "xjhAnI2q6hM"], "start_seconds": ["20", "6"], "properties": ["flushes, drains, water", "engine revs, vehicle, people"], "captions_pred_video": ["footage of the toilet in the bathroom", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a toilet is flushed", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a clang followed by a toilet flushing", "paper is crumpling consistently"], "sample_ids": ["wNZ5thZM7XU", "v5cSxLaHADY"], "start_seconds": ["20", "0"], "properties": ["sound, flush, toilet", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a toilet in a bathroom stall", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a toilet flushes", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "roadway noise occurs and a truck accelerates"], "sample_ids": ["vqZuVbG6-HI", "tgbONvsP47Y"], "start_seconds": ["130", "0"], "properties": ["background, male, female", "noise, truck, accelerate"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a car is driving on the road "], "question": "which entity is more quiet", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "a man speaks as a motor runs in the background"], "sample_ids": ["sWZzXuWYY", "xZepNM9qcRA"], "start_seconds": ["420", "30"], "properties": ["male, speech, banging", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "a telephone rings followed by a woman talking"], "sample_ids": ["wyllXV6PjKo", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["a kid, talk, cry", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman speaks and a baby cries", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a motorcycle engine works nearby", "an insect buzzes around continuously"], "sample_ids": ["tOSWIURC-4", "v25l1jef3JY"], "start_seconds": ["0", "0"], "properties": ["engine, work, nearby", "buzzes, continuously, insect"], "captions_pred_video": [null, "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a lawn mower is running ", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["an airplane engine runs", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["yVPZ2MNWpms", "tiDFTC-5vU"], "start_seconds": ["0", "30"], "properties": ["engine, airplane, runs", "male, duck, laugh"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", null], "captions_pred_audio": ["a car is driving by on the road ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a clock ticktocks"], "sample_ids": ["y2bVZ7rz-5M", "v-g-j2uTByM"], "start_seconds": ["280", "30"], "properties": ["motor noise, horn, siren", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a vehicle accelerates squealing tires", "paper is crumpling consistently"], "sample_ids": ["sd7xVssqlw", "v5cSxLaHADY"], "start_seconds": ["50", "0"], "properties": ["accelerates, tires, squealing", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a car accelerates and revs its engine ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a child yells and another yells", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vMDHu7Lxcgw", "tdWhHV3X25Q"], "start_seconds": ["410", "60"], "properties": ["two, yell, child", "applause, audience, yells"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a man is speaking and a crowd is clapping"], "question": "which entity has more people", "label": 1}, {"captions": ["vehicles pass by on a roadway", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["tgbONvsP47Y", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["pass, vehicle, roadway", "engine, idle, woman"], "captions_pred_video": ["footage of a fire truck entering a garage", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a car is driving on the road ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "water running down a sink while a man is talking"], "sample_ids": ["sZPuqDgX2V0", "vSeGhaZt-aI"], "start_seconds": ["30", "50"], "properties": ["commentator, race, track", "water, sink, talk"], "captions_pred_video": [null, "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a man is speaking and pouring liquid with background noise "], "question": "which entity is a video of a race?", "label": 0}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a person is snoring while sleeping"], "sample_ids": ["uzQnlJXBbOM", "vJrjSeP17yE"], "start_seconds": ["50", "40"], "properties": ["ringing, beep, stop", "a person is sleeping, snoring, person"], "captions_pred_video": ["footage of a person using a cell phone on a table", "a black background with a small plane flying in the sky"], "captions_pred_audio": ["a telephone rings and a man speaks", "a person snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "females talk and laugh over gusting wind"], "sample_ids": ["tEE3MpBt1sg", "un9VQlzgZM"], "start_seconds": ["50", "5"], "properties": ["drill, something, laugh", "females, talk, laugh"], "captions_pred_video": ["footage is blurry due to the smoke in the air", null], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a woman is speaking and laughing with wind noise and breathing in the background "], "question": "which entity is about a drill?", "label": 0}, {"captions": ["water flows followed by women screaming", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["w5W5Kqtc8E", "vbZ-0lGPneg"], "start_seconds": ["100", "30"], "properties": ["water, flow, women", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman is speaking and a dog is whimpering"], "question": "which entity is more calm", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "people speak as gunfire rings out"], "sample_ids": ["tDVADusiIoc", "wqTCwqVRDlk"], "start_seconds": ["60", "80"], "properties": ["wind, radio, waves", "gunfire, ring, speak"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "wind blows as people chatter quietly"], "sample_ids": ["vXlk0lIQBFo", "xBxDz0CFVn0"], "start_seconds": ["470", "30"], "properties": ["wind, speak, vocalize", "wind, chatter, people"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "footage is blurry and out of focus"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a stream of water runs briefly"], "sample_ids": ["yLy-WycbVVE", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["background, people, talk", "stream, water, run"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["yeFvk9x0wWI", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["clack, bird, chirp", "gun, shoot, water"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used for hunting", "label": 1}, {"captions": ["a baby cries and wails as an adult female speaks", "water splashes as an animal walks through"], "sample_ids": ["zliInBdC98Y", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["a, baby, cries, wails", "animal, water, splashes"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a baby cries and a woman speaks", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["an audience gives applause", "a man speaks as crickets sing"], "sample_ids": ["x6iCUDmRpKQ", "ryFDPxgDOGc"], "start_seconds": ["38", "570"], "properties": ["applause, audience, give", "a, crickets, sing"], "captions_pred_video": ["a black background with the moon and stars in the sky", "a group of people dressed in camouflage and hunting gear in the dark"], "captions_pred_audio": ["a group of people are clapping and cheering", "a man is speaking with crickets chirping in the background"], "question": "which is not a type of music", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["uWAAAL4CIoc", "tDlysoZiA1I"], "start_seconds": ["0", "0"], "properties": ["a woman, chirps, animal", "animal, grunts, chirps"], "captions_pred_video": [null, "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "birds are chirping and a rooster is crowing "], "question": "which entity has a woman speaking happily and an animal chirps?", "label": 0}, {"captions": ["people speak as gunfire rings out", "a stream of water flows as people talk and wind blows"], "sample_ids": ["wqTCwqVRDlk", "xBxDz0CFVn0"], "start_seconds": ["80", "30"], "properties": ["gunfire, ring, speak", "stream, water, flow"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a gun is fired", "a man is speaking with wind noise in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["a person speaks briefly", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["zOZleIRqZm4", "w34HjHr6gAY"], "start_seconds": ["80", "30"], "properties": ["person, talk, brief", "beeps, hit, woman"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a beep sounds followed by a child speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "people cheer as a vehicle engine revs"], "sample_ids": ["sQwlkXjQabo", "xjhAnI2q6hM"], "start_seconds": ["10", "6"], "properties": ["liquid, surface, spray", "engine revs, vehicle, people"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["spraying followed by silence", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["someone is burping continuously", "a man speaks as a car is passing by"], "sample_ids": ["y636gklDioE", "sK4u5T8hW78"], "start_seconds": ["20", "30"], "properties": ["burps, burps, burps", "a, car, pass"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a person burps loudly several times", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["continuous sneezing together with speech", "paper is crumpling consistently"], "sample_ids": ["x4dZyf9Gbj0", "v5cSxLaHADY"], "start_seconds": ["130", "0"], "properties": ["continuous, sneeze, speech", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage is blurry and out of focus", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman sneezes and speaks", "paper is crumpled and crinkled"], "question": "which entity is crumpling consistently", "label": 1}, {"captions": ["food is frying then a woman speaks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["ukxt9I7eMMg", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["food, woman, speak", "multiple, people, yell"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "a clock ticktocks"], "sample_ids": ["uZesmtKZGSw", "v-g-j2uTByM"], "start_seconds": ["250", "30"], "properties": ["men, talk, cars", "ticktocks, clock, ticktocks"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "an airplane engine roars increasingly louder"], "sample_ids": ["wSVhSdj0F0", "vBslzh7saPw"], "start_seconds": ["10", "90"], "properties": ["beep, clang, footsteps", "engine, roar, louder"], "captions_pred_video": [null, "a pickup truck carrying a large object down the road"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a jet engine roars and accelerates "], "question": "which entity is louder", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "someone whistles a tune"], "sample_ids": ["w5W5Kqtc8E", "sIXTftIuUgw"], "start_seconds": ["100", "90"], "properties": ["wind, blow, vehicle", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["motors runs briefly and tires screech", "a man speaks as a car is passing by"], "sample_ids": ["yRx9txMcBl0", "sK4u5T8hW78"], "start_seconds": ["40", "30"], "properties": ["motors, tires, screech", "a, car, pass"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a clock ticktocks continuously", "paper is crumpling consistently"], "sample_ids": ["vlJS7LN2XyM", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["ticktocks, clock, ticktocks continuously", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a ticktock of a clock", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["roadway noise occurs and a truck accelerates", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["tgbONvsP47Y", "sLUnaPT5gM8"], "start_seconds": ["0", "0"], "properties": ["noise, truck, accelerate", "loud, laughter, intermittent"], "captions_pred_video": ["footage of a fire truck entering a garage", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a car is driving on the road ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man speaks while turning a water faucet on", "some men converse over an engine running"], "sample_ids": ["vf9xf3vMsGM", "sCiy7QS1U"], "start_seconds": ["540", "300"], "properties": ["A man speaks while turning a water faucet on.", "men, converse, engine"], "captions_pred_video": ["of the person washing their hands under the faucet", null], "captions_pred_audio": ["a man is speaking while water is running in the background", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "wind blows and people scream while an engine revs"], "sample_ids": ["s4Uz1Ffgo04", "w5W5Kqtc8E"], "start_seconds": ["100", "100"], "properties": ["water, rushes, motorcycle", "wind, engine, scream"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is more likely to be in a car", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "waves crash against a shoreline and people speak"], "sample_ids": ["w5W5Kqtc8E", "yFB25fqfU8I"], "start_seconds": ["100", "300"], "properties": ["wind, engine, scream", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more calm", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "a telephone rings followed by a woman talking"], "sample_ids": ["xOZfdgAgJ9o", "tGcFnX0GHI"], "start_seconds": ["40", "0"], "properties": ["woman, whimpering, speaking", "ring, talk, woman"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a dial tone sounds followed by a woman speaking"], "question": "which woman is speaking", "label": 1}, {"captions": ["a woman speaks and food sizzles while frying", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wTideSjRFS0", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["food, sizzle, woman", "applause, audience, yells"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a woman sneezes then speaks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["x4dZyf9Gbj0", "sSMl2vc3ek"], "start_seconds": ["130", "20"], "properties": ["sneezes, speaks, woman", "loud, multiple, distance"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a woman sneezes and speaks", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["someone snores nearby", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["spJCm8tD9Zo", "vbZ-0lGPneg"], "start_seconds": ["90", "30"], "properties": ["someone snores, nearby, someone", "a woman, a television program, a bird"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program?", "label": 1}, {"captions": ["a train horn sounds as the train approaches", "people speak in the background as a clock ticktocks"], "sample_ids": ["slZLHwNbbt4", "vZAw4apG0Es"], "start_seconds": ["300", "30"], "properties": ["train, horn, sound", "background, clock, ticktocks"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a clock is ticking and people are talking"], "question": "which entity is a clock", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "people speak as gunfire rings out"], "sample_ids": ["uqFtmnhuqA8", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["a, b, c", "gunfire, ring, speak"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sK4u5T8hW78", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["a, car, pass", "airplane, boy, fly"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman speaks while a helicopter flies overhead "], "question": "which object is flying by", "label": 0}, {"captions": ["paper is repeatedly crumpled and crinkled", "water flows and trickles"], "sample_ids": ["vms5XGTDVQc", "tB7hWb9gTuQ"], "start_seconds": ["220", "30"], "properties": ["paper, crumpled, crinkled", "water, flow, trickle"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["paper is crumpled and crinkled", "water is splashing and gurgling"], "question": "which entity is flowing", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "winds blows roughly as a vehicle races past"], "sample_ids": ["tK4VlLsNxak", "xjvTpk2Zpr8"], "start_seconds": ["120", "70"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "wind, blows, vehicle"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a car", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "birds tweet and squawk"], "sample_ids": ["sLUnaPT5gM8", "w1mlz3Pe4fU"], "start_seconds": ["0", "300"], "properties": ["loud, laughter, intermittent", "squawk, tweet, scream"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "of a bird in a cage"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "birds are chirping and singing"], "question": "which entity is a bird", "label": 1}, {"captions": ["food is frying then a woman speaks", "several insects fly while two men talk"], "sample_ids": ["ukxt9I7eMMg", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["food, woman, speak", "several, fly, men"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["low humming with a clock ticking and birds chirping", "a infant makes noise and is excited"], "sample_ids": ["yVumC9TGknc", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["humming, clock, birds", "noise, excited, infant"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a series of beeps and chirps", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a motor idles, accelerates, then slows down.", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vYkA3cfXp5Q", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["speed, idle, accelerate", "three men, wind, flow"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["an engine is idling", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more like a movie", "label": 1}, {"captions": ["a man speaks as bees buzz and birds chirp", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["t25U-v4k4ts", "zl9Dqx-j7q4"], "start_seconds": ["40", "6"], "properties": ["bees buzz, birds chirp, man speaks", "engine, laugh, loud"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "a stream of water runs briefly"], "sample_ids": ["sxYkFKFIZD0", "x-PeY8Yb8M4"], "start_seconds": ["20", "300"], "properties": ["screech, man, door", "stream, water, run"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "people applaud and hoot and chat quietly"], "sample_ids": ["xfudFO976zE", "wwyfGO2J4"], "start_seconds": ["0", "90"], "properties": ["animal, bleats, cry", "people, applaud, hoot"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "two men speak as a buffeting wind blows"], "sample_ids": ["vf9xf3vMsGM", "y8WEcpOlT3I"], "start_seconds": ["540", "40"], "properties": ["A man speaks while turning a water faucet on.", "wind, speak, buffeting"], "captions_pred_video": ["of the person washing their hands under the faucet", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a man is speaking with wind noise in the background "], "question": "which entity is indoors", "label": 0}, {"captions": ["a male speaks and another male speaks", "a man speaks as a motor runs in the background"], "sample_ids": ["viuTg1M-dqg", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["two males, speaking, male", "background, motor, run"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["yJ0TePmaOo", "zl9Dqx-j7q4"], "start_seconds": ["390", "6"], "properties": ["two hard objects, man, speak", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["wyllXV6PjKo", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["a baby, a woman, a man", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman speaks and a baby cries", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a baby?", "label": 0}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["vuUVPzd2FXw", "yDoT73BWsdA"], "start_seconds": ["160", "10"], "properties": ["a, steam, release", "engine, revs, vehicle"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["an infant crying as a woman laughs", "wind blows as people chatter quietly"], "sample_ids": ["xhmRY9yhC7c", "xBxDz0CFVn0"], "start_seconds": ["20", "30"], "properties": ["a, laugh, infant", "wind, chatter, people"], "captions_pred_video": ["of a baby crying in a baby bouncer", "footage is blurry and out of focus"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a cat meows and children speak", "an engine revs and a turning noise is made"], "sample_ids": ["x5cuQjOdM3E", "tOSWIURC-4"], "start_seconds": ["30", "0"], "properties": ["cat, speak, children", "noise, engine, revs"], "captions_pred_video": ["a black background with an airplane flying in the sky", null], "captions_pred_audio": ["a cat meows and a woman speaks", "a lawn mower is running "], "question": "which entity is making a noise", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "a child speaks in closed space"], "sample_ids": ["sfAvvZwdLCY", "yW6FWLSLkx4"], "start_seconds": ["20", "40"], "properties": ["flushes, drains, water", "child, space, speak"], "captions_pred_video": ["footage of the toilet in the bathroom", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "a man speaks as a motor runs in the background"], "sample_ids": ["sDSppXIlJrs", "xZepNM9qcRA"], "start_seconds": ["27", "30"], "properties": ["microphone, water, wind", "background, motor, run"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "a woman speaks as she rubs two objects together"], "sample_ids": ["slZLHwNbbt4", "vzxHnu-SFEw"], "start_seconds": ["300", "80"], "properties": ["a, horn, run", "two objects, woman, speak"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["a baby cries and a woman moans", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["smDKStoHBJo", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["a, cry, woman", "a, scream, girl"], "captions_pred_video": ["a man holding a crying baby in his arms", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["motors runs briefly and tires screech", "people cheer as a vehicle engine revs"], "sample_ids": ["yRx9txMcBl0", "xjhAnI2q6hM"], "start_seconds": ["40", "6"], "properties": ["motors, tires, screech", "engine revs, vehicle, people"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a person snoring several times", "a clock ticktocks"], "sample_ids": ["spJCm8tD9Zo", "v-g-j2uTByM"], "start_seconds": ["90", "30"], "properties": ["snore, person, several", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a person is snoring loudly", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["rwtmaKiCcQU", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["nozzle, depressed, spray can", "water, radio, man"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["spraying and people speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water?", "label": 1}, {"captions": ["water splashes as an animal walks through", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["w1ir-sZ3Im8", "tDlysoZiA1I"], "start_seconds": ["90", "0"], "properties": ["animal, water, splashes", "animal, grunts, chirps"], "captions_pred_video": ["footage of a group of people riding horses through a river", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["water splashes and gurgles as people speak", "birds are chirping and a rooster is crowing "], "question": "which animal is making noise", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "a car accelerates and wind blows"], "sample_ids": ["vddP56-ogds", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["water, flow, laugh", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "a machine beeps continuously"], "sample_ids": ["sofxkNWaP0s", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["wind, engine, louder", "beeps, machine, continuously"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", null], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "a power tool runs and touches a surface"], "sample_ids": ["vh30P49Po6s", "zfvPRf3chY"], "start_seconds": ["30", "290"], "properties": ["loud, continuous, quacks", "power tool, run, touch"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking while a power tool is being used "], "question": "which is not a continuous action", "label": 1}, {"captions": ["a baby cries and a woman moans", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["smDKStoHBJo", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["a, cry, woman", "engine, idle, woman"], "captions_pred_video": ["a man holding a crying baby in his arms", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "water pouring and bubbling"], "sample_ids": ["vb1fPSDI4c", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["multiple, people, yell", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a crowd of people are talking and laughing", "water is running from a faucet"], "question": "which entity is more calm", "label": 1}, {"captions": ["a woman speaks and food sizzles while frying", "people cheer as a vehicle engine revs"], "sample_ids": ["wTideSjRFS0", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["food, sizzle, woman", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wTideSjRFS0", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["food, sizzle, woman", "three men, wind, flow"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a woman speaking as frying food sizzles?", "label": 0}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["zgUgkpk78xU", "wz7N8YRy74I"], "start_seconds": ["70", "30"], "properties": ["clinking, humming, horn", "rooster, crow, background, men"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "water pouring and bubbling"], "sample_ids": ["ylpYOorfH4o", "uyRfq-jKPpo"], "start_seconds": ["410", "50"], "properties": ["engine, running, wind", "water, bubbles, pouring"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and an engine is revving", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["wtDqrBygTcU", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["man, engine, run", "harsh, wind, blows"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking and a motor is running", "a man is speaking with wind noise in the background "], "question": "which entity is about a boat?", "label": 0}, {"captions": ["a kid speaks followed by music playing", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["tQWGZLItBXk", "vfYTJq7nU"], "start_seconds": ["170", "130"], "properties": ["music, kid, speak", "rustling, ducks, quack"], "captions_pred_video": ["worms revolution screenshots", null], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "water pouring and bubbling"], "sample_ids": ["u21-Z5gJCB8", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["background, voice, man", "water, bubbles, pouring"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "water is running from a faucet"], "question": "which entity is more active", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "a car accelerates and wind blows"], "sample_ids": ["wqADXCzngMw", "u0TrcHhkPQ"], "start_seconds": ["340", "20"], "properties": ["engine, idle, man", "accelerates, wind, blows"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", null], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["birds fly and flutter around", "paper is crumpling consistently"], "sample_ids": ["wGKgwOP3h30", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["fly, flutter, around", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of the pigeons in the coop", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["pigeons coo and flap their wings", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man speaks uses a drill", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["x5eIC7S0fbg", "uZesmtKZGSw"], "start_seconds": ["60", "250"], "properties": ["A man is speaking, uses a drill, and is a tool", "men, talk, cars"], "captions_pred_video": ["a person in surgical gloves is using a needle to remove a small object from a tooth", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking and using a power tool ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about a tool", "label": 0}, {"captions": ["a woman speaks in a fast tone with a male", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["sTpirNYo8vQ", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["a, tone, fast", "rustling, ducks, quack"], "captions_pred_video": ["of a man taking a selfie on a bus", null], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a duck quacks and a woman speaks"], "question": "which entity is about a duck?", "label": 1}, {"captions": ["a large crowd cheers and applauds", "a infant makes noise and is excited"], "sample_ids": ["rqfQRErjfk8", "wIJK3-5y0kA"], "start_seconds": ["170", "30"], "properties": ["crowd, cheers, applauds", "noise, excited, infant"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["an audience gives applause", "multiple people speak and children yell while water gurgles"], "sample_ids": ["x6iCUDmRpKQ", "vb1fPSDI4c"], "start_seconds": ["38", "30"], "properties": ["applause, audience, give", "multiple, people, yell"], "captions_pred_video": ["a black background with the moon and stars in the sky", null], "captions_pred_audio": ["a group of people are clapping and cheering", "a crowd of people are talking and laughing"], "question": "which entity is more active", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a man laughs and speaks as cats purr and hiss"], "sample_ids": ["zY3icUyMdh8", "vVhthZ45k3Y"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "cat, purr, hiss"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage is blurry and out of focus"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a man is speaking and a cat is meowing"], "question": "which entity is more playful", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["uPDn2BFTHk", "zl9Dqx-j7q4"], "start_seconds": ["140", "6"], "properties": ["woman, laughs, speaks", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["loud, continuous burping", "multiple people speak and children yell while water gurgles"], "sample_ids": ["y636gklDioE", "vb1fPSDI4c"], "start_seconds": ["20", "30"], "properties": ["loud, continuous, burping", "multiple, people, yell"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", null], "captions_pred_audio": ["a person burps loudly several times", "a crowd of people are talking and laughing"], "question": "which entity is quieter", "label": 1}, {"captions": ["a car accelerates and wind blows", "a man speaks as a car is passing by"], "sample_ids": ["u0TrcHhkPQ", "sK4u5T8hW78"], "start_seconds": ["20", "30"], "properties": ["accelerates, wind, blows", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking with background noise and breathing sounds "], "question": "which car is moving faster", "label": 0}, {"captions": ["a woman sneezes then speaks", "birds chirp and objects are moved around"], "sample_ids": ["x4dZyf9Gbj0", "yPUYU6t3rwo"], "start_seconds": ["130", "370"], "properties": ["sneezes, speaks, woman", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage is blurry and out of focus", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a woman sneezes and speaks", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "a toilet flushes and water drains"], "sample_ids": ["tEE3MpBt1sg", "sfAvvZwdLCY"], "start_seconds": ["50", "20"], "properties": ["drill, something, laugh", "water drains, flushes, water"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage of the toilet in the bathroom"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "an airplane engine runs"], "sample_ids": ["vbr9mHKc8WM", "yVPZ2MNWpms"], "start_seconds": ["40", "0"], "properties": ["noise, loudness, engine", "engine, airplane, runs"], "captions_pred_video": [null, "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["an engine is idling", "a car is driving by on the road "], "question": "which entity is a machine", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "a dog barks and whimpers"], "sample_ids": ["zj2R0XoFr5k", "sShpyu2l4YQ"], "start_seconds": ["50", "0"], "properties": ["airplane, boy, fly", "barks, whimpers, dog"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "the puppies are playing with a toy"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a dog is barking and growling"], "question": "which entity is a dog?", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["y2bVZ7rz-5M", "w5W5Kqtc8E"], "start_seconds": ["280", "100"], "properties": ["motor noise, horn, siren", "wind, blow, vehicle"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", null], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a vehicle engine running?", "label": 1}, {"captions": ["food is frying then a woman speaks", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["ukxt9I7eMMg", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["food, woman, speak", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["an airplane accelerates briefly", "dishes cling together then a man begins to speak"], "sample_ids": ["zjTG0gaGCUI", "sQGXqGcwOTc"], "start_seconds": ["80", "3"], "properties": ["accelerates, airplane, briefly", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a jet engine roars as wind blows ", "mechanisms are operating and water is splashing "], "question": "which entity is a moving object", "label": 0}, {"captions": ["a toilet flushes and water drains", "a man speaks while turning a water faucet on"], "sample_ids": ["sfAvvZwdLCY", "vf9xf3vMsGM"], "start_seconds": ["20", "540"], "properties": ["water drains, flushes, water", "A man speaks while turning a water faucet on."], "captions_pred_video": ["footage of the toilet in the bathroom", "of the person washing their hands under the faucet"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking while water is running in the background"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "water pouring and bubbling"], "sample_ids": ["y1saVTXsKwc", "uyRfq-jKPpo"], "start_seconds": ["80", "50"], "properties": ["a, dog, talk", "water, bubbles, pouring"], "captions_pred_video": ["a dog playing with a pink ball", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a dog barks and a man speaks", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an audience gives applause as a man yells and a group sings", "vehicles pass by on a roadway"], "sample_ids": ["tdWhHV3X25Q", "tgbONvsP47Y"], "start_seconds": ["60", "0"], "properties": ["applause, audience, yells", "pass, vehicle, roadway"], "captions_pred_video": ["a man is talking to another man on a stage in front of a microphone", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a person sniffles and sneezes", "sucking and grunting followed by slurping with birds in the background"], "sample_ids": ["uRlbY6aoBU", "yYEVLuqEytU"], "start_seconds": ["0", "40"], "properties": ["sneezes, sniffles, person", "grunt, slurp, background"], "captions_pred_video": [null, "a baby goat is being petted by a person's hand"], "captions_pred_audio": ["a man is sneezing ", "several sheep bleat and a man speaks"], "question": "which entity is a person", "label": 0}, {"captions": ["a beep repeats multiple times", "a car speeding up in the distance"], "sample_ids": ["y682ml90jGw", "u0TrcHhkPQ"], "start_seconds": ["11", "20"], "properties": ["beep, repeat, multiple", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vBHyYJ8pL0", "w5W5Kqtc8E"], "start_seconds": ["2", "100"], "properties": ["noise, door, opening", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is accompanied by a door opening and closing", "label": 0}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "a woman and man speak while food is frying"], "sample_ids": ["w8uLijTqtlU", "zk-xJGQU8-4"], "start_seconds": ["70", "130"], "properties": ["wind, microphone, noise", "food, man, woman"], "captions_pred_video": ["footage is blurry and shaky", "a man and a woman cooking in a wok on the stove"], "captions_pred_audio": ["the wind is blowing strongly", "a woman is speaking while dishes are clanging and music is playing in the background "], "question": "which entity is a video of a man and woman speaking?", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["y2bVZ7rz-5M", "vbZ-0lGPneg"], "start_seconds": ["280", "30"], "properties": ["motor noise, horn, siren", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a horn?", "label": 0}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tDVADusiIoc", "zj2R0XoFr5k"], "start_seconds": ["60", "50"], "properties": ["water, radio, man", "airplane, boy, fly"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman speaks while a helicopter flies overhead "], "question": "which entity is about flying?", "label": 1}, {"captions": ["a woman speaks and is crumpling paper", "water flows as a woman laughs and a man speaks"], "sample_ids": ["xvDdE3zNf8Y", "vddP56-ogds"], "start_seconds": ["120", "30"], "properties": ["A, crumple, paper", "water, flow, laugh"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", null], "captions_pred_audio": ["a woman speaks and crumples paper", "water is running and gurgling and a man is speaking"], "question": "which entity is a video of a woman speaking and crumpling paper?", "label": 0}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "a car speeding up in the distance"], "sample_ids": ["yNtRmrn0io8", "u0TrcHhkPQ"], "start_seconds": ["210", "20"], "properties": ["storm, distance, strike", "distance, car, speed"], "captions_pred_video": ["footage of a house in the middle of the night", null], "captions_pred_audio": ["rain falls and thunder roars", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["children speak and play together", "a clock ticktocks"], "sample_ids": ["yVVP8XvWJTo", "v-g-j2uTByM"], "start_seconds": ["260", "30"], "properties": ["children, speak, play", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a playground at a school or daycare center", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "birds chirp quietly and an adult man speaks"], "sample_ids": ["yRx9txMcBl0", "zuua6-5goWw"], "start_seconds": ["40", "30"], "properties": ["accelerates, tires, squeals", "birds, chirp, quiet, man, speaks"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot"], "captions_pred_audio": ["a car is revving its engine and skidding ", "birds are chirping and a man is speaking with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "engines sputter roughly and tires squeal"], "sample_ids": ["se87d6yxEOA", "zhx6hoYrHeI"], "start_seconds": ["10", "160"], "properties": ["run, whistle, pass", "engine, sputter, rough"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "footage of a man working on a motorcycle's tire"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a car accelerates and revs its engine "], "question": "which engine is rougher", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "a man speaks while a machine runs before a smoke alarm beeps"], "sample_ids": ["wudZTNBtVqc", "sG7TyPnFDR0"], "start_seconds": ["60", "180"], "properties": ["accelerates, engine, wind", "beeps, machine, smoke alarm"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "a person is using an espresso machine in a restaurant"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking and a microwave oven is beeping "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a helicopter engine runs", "people cheer as a vehicle engine revs"], "sample_ids": ["t5ZbXbniOWk", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["engine, helicopter, run", "engine revs, vehicle, people"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a helicopter is flying overhead ", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["birds coo incessantly", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["yZrFNS7GFBQ", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["coo, bird, incessant", "rooster, crow, background, men"], "captions_pred_video": ["of the bird in the cage", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["an owl hoots in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a bird", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vveS8HT7Uog", "tDVADusiIoc"], "start_seconds": ["100", "60"], "properties": ["a man, objects, speak", "water, radio, man"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which man is speaking over a radio?", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["wTideSjRFS0", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["food, sizzle, woman", "wind, blow, vehicle"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blowing?", "label": 1}, {"captions": ["an airplane engine spools and people speak", "a stream of water runs briefly"], "sample_ids": ["wTjoRj1se3U", "x-PeY8Yb8M4"], "start_seconds": ["390", "300"], "properties": ["airplane, engine, spool", "stream, water, run"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a jet engine is running and people are talking", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "a toilet flushes and a female speaks"], "sample_ids": ["u5RmF3c3Aw", "yaln9y8I7ms"], "start_seconds": ["60", "230"], "properties": ["engine, car, zoom", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["yVumC9TGknc", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["humming, clock, birds", "a woman, something, fried"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a series of beeps and chirps", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vddP56-ogds", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["liquid, laughs, man", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a woman is speaking and a baby is crying"], "question": "which entity has a man talking?", "label": 0}, {"captions": ["females talk and laugh over gusting wind", "a person sniffles and then sneezes in the distance"], "sample_ids": ["un9VQlzgZM", "uRlbY6aoBU"], "start_seconds": ["5", "0"], "properties": ["females, talk, laugh", "a, distance, sneeze"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a man is sneezing "], "question": "which entity is about a person?", "label": 1}, {"captions": ["a flush is followed by gurgling water, then another flush", "a propeller rotates loudly and intensely"], "sample_ids": ["tqR406bGiE", "ugHJF0hfYkg"], "start_seconds": ["40", "10"], "properties": ["flush, water, gurgle", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a toilet is flushed", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a man speaks over intermittent keyboard taps", "a man talks nearby and another man talks far away while some liquid flows"], "sample_ids": ["tw76HGONaKg", "sapQIQUhFc"], "start_seconds": ["570", "280"], "properties": ["audio, man, keyboard", "liquid, flow, distance"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", null], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a man is speaking and a stream is flowing in the background "], "question": "which entity is more distant", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["wztCSUxOf8", "wz7N8YRy74I"], "start_seconds": ["130", "30"], "properties": ["a crowd, yells, applauds", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "a woman speaks as she rubs two objects together"], "sample_ids": ["sncRqQ67iJU", "vzxHnu-SFEw"], "start_seconds": ["460", "80"], "properties": ["loud, repeatedly, man", "two objects, woman, speak"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a person is snoring", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "someone snores nearby"], "sample_ids": ["wy1eKjR7KC0", "spJCm8tD9Zo"], "start_seconds": ["30", "90"], "properties": ["people, talk, distance", "someone snores, nearby, someone"], "captions_pred_video": ["two police officers riding motorcycles down the street", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a man is speaking and a siren is going off", "a person is snoring loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["someone is burping continuously", "a child speaks in closed space"], "sample_ids": ["y636gklDioE", "yW6FWLSLkx4"], "start_seconds": ["20", "40"], "properties": ["burps, burps, burps", "child, space, speak"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a person burps loudly several times", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is not speaking?", "label": 0}, {"captions": ["sirens ring and approach with humming of distant traffic", "water is sprayed across a hard surface"], "sample_ids": ["xERFUeZONz8", "sQwlkXjQabo"], "start_seconds": ["0", "10"], "properties": ["ring, approach, traffic", "water, spray, surface"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["an emergency vehicle siren blares", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["some clanking with distant murmuring", "people applaud and hoot and chat quietly"], "sample_ids": ["uMTTDZ2mb4", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["clanking, murmuring, distant", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be a performance", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "birds chirp and objects are moved around"], "sample_ids": ["wqADXCzngMw", "yPUYU6t3rwo"], "start_seconds": ["340", "370"], "properties": ["engine, idle, man", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["wjsXBsc7M40", "zFjIWfSD-4"], "start_seconds": ["10", "410"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "People, motor, brakes"], "captions_pred_video": ["footage of the baby playing with a toothbrush", null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is more likely to be in a car", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["tDlfY3nmx1A", "vfYTJq7nU"], "start_seconds": ["160", "130"], "properties": ["applause, laugh, man", "rustling, ducks, quack"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", null], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "a child speaks"], "sample_ids": ["zkKdxzNC97Y", "yW6FWLSLkx4"], "start_seconds": ["27", "40"], "properties": ["loud, bang, noise", "a, child, speaks"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a door is opened and closed", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is silent", "label": 0}, {"captions": ["a propeller moves loudly nearby", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["ugHJF0hfYkg", "uYT5gxnyMWM"], "start_seconds": ["10", "50"], "properties": ["loud, propeller, move", "a, scream, girl"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and a baby is crying"], "question": "which entity is quieter", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["zY3icUyMdh8", "uEU-Hg5MTN8"], "start_seconds": ["20", "27"], "properties": ["dog, bark, engine", "a woman, laughs, animal"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a person", "label": 1}, {"captions": ["paper folding and crinkling", "water flows as men speak and yell"], "sample_ids": ["zPpG3RD8lSs", "vJ7JPEFhyLA"], "start_seconds": ["20", "16"], "properties": ["paper, fold, crinkle", "water, flow, men"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is moving", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "some tunes played by whistling"], "sample_ids": ["x6ijhqRY38s", "u6BnG6YZqJ4"], "start_seconds": ["250", "0"], "properties": ["something metal, glass, hit", "tune, play, whistling"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["an engine runs and a man speaks", "paper is crumpling consistently"], "sample_ids": ["yT5WfYMRr-U", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["engine, run, man", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "people cheer as a vehicle engine revs"], "sample_ids": ["vJvryTwuAV8", "xjhAnI2q6hM"], "start_seconds": ["16", "6"], "properties": ["audience, cheer, man", "engine revs, vehicle, people"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "some tunes played by whistling"], "sample_ids": ["sNB8zxXneIM", "u6BnG6YZqJ4"], "start_seconds": ["20", "0"], "properties": ["several, quack, cocks", "tune, play, whistling"], "captions_pred_video": ["a group of geese in a cage", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a person whistling a song"], "question": "which entity is playing a tune", "label": 1}, {"captions": ["a motor idles, accelerates, then slows down.", "pigeons vocalize and birds chirp"], "sample_ids": ["vYkA3cfXp5Q", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["speed, idle, accelerate", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "of the pigeon in the cage"], "captions_pred_audio": ["an engine is idling", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a girl talking, laughing and sneezing noise", "a person speaks over rustling leaves"], "sample_ids": ["y4tPJXBKDig", "zOZleIRqZm4"], "start_seconds": ["20", "80"], "properties": ["a, noise, talk", "rustling, leaves, person"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a man is speaking with crickets chirping in the background"], "question": "which entity is a person speaking over rustling leaves?", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vZAw4apG0Es", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["background, clock, ticktocks", "airplane, boy, fly"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a clock is ticking and people are talking", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a church bell rings several times", "a car accelerates and wind blows"], "sample_ids": ["sUVVjE3Ucp8", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["ring, bell, several", "accelerates, wind, blows"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", null], "captions_pred_audio": ["a church bell is ringing ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a toilet flushes and water drains", "a man speaks as a car is passing by"], "sample_ids": ["sfAvvZwdLCY", "sK4u5T8hW78"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "a, car, pass"], "captions_pred_video": ["footage of the toilet in the bathroom", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["multiple birds vocalize and wind blows", "water flows and trickles"], "sample_ids": ["uoGVs9yUqY4", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["multiple, vocalize, wind", "water, flow, trickle"], "captions_pred_video": ["for how to make a wooden shed door youtube", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a male speaks over some small clicks", "a person is snoring while sleeping"], "sample_ids": ["uXxVebHsGZ8", "vJrjSeP17yE"], "start_seconds": ["30", "40"], "properties": ["male, clicks, speak", "a person is sleeping, snoring, person"], "captions_pred_video": [null, "a black background with a small plane flying in the sky"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a person snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["male speech with light ticking", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["xO-Q2BlIIPU", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["male, speech, ticking", "rooster, crow, background, men"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "an electric engine works nearby followed by a child talking"], "sample_ids": ["s59PfAghdkM", "xSKJGCItUWE"], "start_seconds": ["0", "10"], "properties": ["bird, chirp, background, horse, neigh", "engine, work, child"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "footage of the helicopter flying in the room"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a high pitched engine is running and a child speaks"], "question": "which entity has a child talking?", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "a vehicle is skidding and squealing tires"], "sample_ids": ["ylpYOorfH4o", "soTOh3zYJfY"], "start_seconds": ["410", "40"], "properties": ["motor, run, steady", "vehicle, skid, tires"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "a red car drifting on a winding road with smoke coming out of it"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a race car accelerates and revs its engine "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["insects humming with a dog barking and small goat bleating", "a woman speaks happily and an animal chirps"], "sample_ids": ["tIY7qOV3rEM", "uWAAAL4CIoc"], "start_seconds": ["0", "0"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "a woman, chirps, animal"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", null], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a woman is speaking and a dog is barking "], "question": "which animal is speaking", "label": 0}, {"captions": ["a man speaks as a motor runs in the background", "wind blows as people chatter quietly"], "sample_ids": ["xZepNM9qcRA", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["background, motor, run", "wind, chatter, people"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "footage is blurry and out of focus"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "a man speaks as a vehicles passes by then a woman speaks"], "sample_ids": ["vcmWSmvti8", "siJFXfGWgDk"], "start_seconds": ["30", "50"], "properties": ["music, man, fire", "man, woman, vehicle"], "captions_pred_video": [null, "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a man is speaking and birds are chirping in the background "], "question": "which entity has a man speaking as music plays before artillery is fired?", "label": 0}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["su6FAOcOA8c", "ukg5L09Wpvo"], "start_seconds": ["4", "150"], "properties": ["engine, idle, woman", "clickety-clack, train, whistle"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a dog barks and whimpers", "people speak as gunfire rings out"], "sample_ids": ["sShpyu2l4YQ", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["barks, whimpers, dog", "gunfire, ring, speak"], "captions_pred_video": ["the puppies are playing with a toy", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a dog is barking and growling", "a man is speaking and a gun is fired"], "question": "which entity is more calm", "label": 0}, {"captions": ["someone whistles a song", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["sIXTftIuUgw", "wDVMhEdTiVw"], "start_seconds": ["90", "30"], "properties": ["someone, song, whistle", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a person whistling a song", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be heard", "label": 0}, {"captions": ["wind blows and people talk while livestock vocalizes", "an engine runs loudly"], "sample_ids": ["vXlk0lIQBFo", "vqZuVbG6-HI"], "start_seconds": ["470", "130"], "properties": ["wind, talk, vocalize", "loud, engine, run"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "footage is blurry because it's raining outside"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks uses a drill", "a woman speaks as she rubs two objects together"], "sample_ids": ["x5eIC7S0fbg", "vzxHnu-SFEw"], "start_seconds": ["60", "80"], "properties": ["A man is speaking, uses a drill, and is a tool", "two objects, woman, speak"], "captions_pred_video": ["a person in surgical gloves is using a needle to remove a small object from a tooth", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and using a power tool ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a tool", "label": 0}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["ylpYOorfH4o", "wqZ135Ssz0"], "start_seconds": ["410", "60"], "properties": ["motor, run, steady", "two men, woman, birds"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", null], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more moving parts", "label": 1}, {"captions": ["a propeller moves loudly nearby", "a man speaks as a car is passing by"], "sample_ids": ["ugHJF0hfYkg", "sK4u5T8hW78"], "start_seconds": ["10", "30"], "properties": ["loud, propeller, move", "a, car, pass"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking with background noise and breathing sounds "], "question": "which is not a moving object", "label": 1}, {"captions": ["small dogs yip and bark sharply", "some men converse over an engine running"], "sample_ids": ["v-wcQf4BDY0", "sCiy7QS1U"], "start_seconds": ["120", "300"], "properties": ["bark, yip, sharply", "men, converse, engine"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", null], "captions_pred_audio": ["a dog barks and growls", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is more quiet", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "winds blows roughly as a vehicle races past"], "sample_ids": ["weDbePuc-Xc", "xjvTpk2Zpr8"], "start_seconds": ["40", "70"], "properties": ["music, slaps, human", "wind, blows, vehicle"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a small engine spits as it runs", "a airplane flies overhead as a woman speaks"], "sample_ids": ["sZvwOuuPGP0", "zj2R0XoFr5k"], "start_seconds": ["50", "50"], "properties": ["spits, engine, runs", "airplane, fly, woman"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a medium engine is running ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "a man speaks over intermittent keyboard taps"], "sample_ids": ["sDSppXIlJrs", "tw76HGONaKg"], "start_seconds": ["27", "570"], "properties": ["microphone, water, wind", "audio, man, keyboard"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a man speaks and types on a computer keyboard "], "question": "which entity is a recording of a man speaking?", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "wind blowing followed by a zoom"], "sample_ids": ["tDlfY3nmx1A", "vr8ZXjEBhMQ"], "start_seconds": ["160", "150"], "properties": ["applause, laugh, man", "wind, blow, zoom"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "a man speaks as a car is passing by"], "sample_ids": ["vhJWZheqaE", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["water drains unevenly, toilet flushes, water drains", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a stream of water flows as people talk and wind blows"], "sample_ids": ["uEU-Hg5MTN8", "xBxDz0CFVn0"], "start_seconds": ["27", "30"], "properties": ["a woman, laughs, animal", "stream, water, flow"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "a child speaks in closed space"], "sample_ids": ["sQwlkXjQabo", "yW6FWLSLkx4"], "start_seconds": ["10", "40"], "properties": ["liquid, surface, spray", "child, space, speak"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["spraying followed by silence", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a machine beeps continuously", "a machine beeps continuously"], "sample_ids": ["y682ml90jGw", "y682ml90jGw"], "start_seconds": ["11", "11"], "properties": ["beeps, machine, continuously", "beeps, machine, continuously"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "a beeping sound is being made "], "question": "which machine beeps continuously", "label": 1}, {"captions": ["birds chirp then an animal grunts", "paper is crumpling consistently"], "sample_ids": ["tDlysoZiA1I", "v5cSxLaHADY"], "start_seconds": ["0", "0"], "properties": ["animal, grunt, chirp", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["tOj4tdLRaA", "xKB8O8LTs6s"], "start_seconds": ["70", "70"], "properties": ["woman, laugh, baby", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a baby laughs and a woman speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more calm", "label": 0}, {"captions": ["plastic is tapped on while someone speaks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["wvKpEYswXO0", "vfYTJq7nU"], "start_seconds": ["150", "130"], "properties": ["plastic, tap, speak", "rustling, ducks, quack"], "captions_pred_video": ["of the person preparing food in the kitchen", null], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a duck quacks and a woman speaks"], "question": "which entity is about a duck?", "label": 1}, {"captions": ["a person screams glaringly", "vehicles pass by on a roadway"], "sample_ids": ["xC8kbrKJmco", "tgbONvsP47Y"], "start_seconds": ["0", "0"], "properties": ["glaringly, screams, person", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a goat is bleating ", "a car is driving on the road "], "question": "which entity is more passive", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["tOSWIURC-4", "yDoT73BWsdA"], "start_seconds": ["0", "10"], "properties": ["engine, work, nearby", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a lawn mower is running ", "a race car accelerates and revs its engine "], "question": "which entity has a vehicle passing by?", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "a man speaks while a rooster crows and other people speak in the background"], "sample_ids": ["s3cTDAj31g", "wz7N8YRy74I"], "start_seconds": ["80", "30"], "properties": ["man, talk, woman", "rooster, crow, background, people"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and a baby is crying", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["water flows and trickles", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tB7hWb9gTuQ", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["water, flow, trickle", "a woman, something, fried"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "- a woman cooking in the kitchen"], "captions_pred_audio": ["water is splashing and gurgling", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of something being fried?", "label": 1}, {"captions": ["speaking following by laughing and clapping", "dishes cling together then a man begins to speak"], "sample_ids": ["u2f5NpsoHBg", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["person, laugh, clap", "cling, speak, dishes"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "mechanisms are operating and water is splashing "], "question": "which entity shows a person speaking and laughing and clapping?", "label": 0}, {"captions": ["a woman talking as an infant is crying", "wind blows as people chatter quietly"], "sample_ids": ["tMbMDvT50j8", "xBxDz0CFVn0"], "start_seconds": ["12", "30"], "properties": ["a, talk, infant", "wind, chatter, people"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "footage is blurry and out of focus"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["zuua6-5goWw", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["birds, chirp, quiet, man, speaks", "a woman, laughs, animal"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["a person sniffs and sneezes", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["uRlbY6aoBU", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["sneezes, person, sniffs", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is sneezing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["wind loudly blowing while people speak in the background followed by a horn blowing", "a child speaks in closed space"], "sample_ids": ["xjhAnI2q6hM", "yW6FWLSLkx4"], "start_seconds": ["6", "40"], "properties": ["wind, blow, loudly", "child, space, speak"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["wind blows as people chatter quietly", "a car speeding up in the distance"], "sample_ids": ["xBxDz0CFVn0", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["wind, chatter, people", "distance, car, speed"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "people speak as gunfire rings out"], "sample_ids": ["wwyfGO2J4", "wqTCwqVRDlk"], "start_seconds": ["90", "80"], "properties": ["people, applaud, hoot", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["several insects fly while two men talk", "waves crash against a shoreline and people speak"], "sample_ids": ["s-T9OVOiMLo", "yFB25fqfU8I"], "start_seconds": ["330", "300"], "properties": ["several, fly, men", "wave, crash, shoreline"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more active", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "a toilet flushes and a female speaks"], "sample_ids": ["se87d6yxEOA", "yaln9y8I7ms"], "start_seconds": ["10", "230"], "properties": ["run, whistle, pass", "female, flushes, toilet"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "footage is blurry and out of focus"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a toilet flushes and a man speaks"], "question": "which entity is a human speaking?", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["ukxt9I7eMMg", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["continuous, woman, speaking", "wind, blow, vehicle"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a woman speaking towards the end?", "label": 0}, {"captions": ["a helicopter engine runs continuously", "someone whistles a tune"], "sample_ids": ["ugHJF0hfYkg", "sIXTftIuUgw"], "start_seconds": ["10", "90"], "properties": ["engine, running, continuously", "someone, tune, whistle"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a person whistling a song"], "question": "which is not a continuous action", "label": 0}, {"captions": ["a person sneezes followed by another person speaking", "a baby cries and fusses, a woman speaks, and a man speaks"], "sample_ids": ["t8CV69hcvF0", "wyllXV6PjKo"], "start_seconds": ["210", "30"], "properties": ["person, sneeze, follow", "a baby, a woman, a man"], "captions_pred_video": ["of an airplane flying in the dark sky at night", null], "captions_pred_audio": ["a woman sneezes and speaks", "a woman speaks and a baby cries"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "water pouring and bubbling"], "sample_ids": ["uOpoD0gGXcs", "uyRfq-jKPpo"], "start_seconds": ["120", "50"], "properties": ["chirps, woman, bird", "water, bubbles, pouring"], "captions_pred_video": ["a herd of cows grazing in the field", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["birds are chirping and a man is speaking", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "a man speaks uses a drill"], "sample_ids": ["w-4gHptFNuU", "x5eIC7S0fbg"], "start_seconds": ["21", "60"], "properties": ["engine revs, accelerates, bump", "A man is speaking, uses a drill, and is a tool"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "a person in surgical gloves is using a needle to remove a small object from a tooth"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking and using a power tool "], "question": "which entity is a tool", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["xfaoyyzw2WU", "xKB8O8LTs6s"], "start_seconds": ["180", "70"], "properties": ["loud, jet engine, roar", "music, gunfire, explosion"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["wRBHTgrbiwg", "vbZ-0lGPneg"], "start_seconds": ["50", "30"], "properties": ["bird, owl, speak", "a woman, a television program, a bird"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program?", "label": 1}, {"captions": ["sawing of wood and rustling with leaves blowing in the distance", "wind blowing followed by a zoom"], "sample_ids": ["uiItxDsDMFI", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["sound, distance, leaves", "wind, blow, zoom"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a saw is being used with background noise ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom?", "label": 1}, {"captions": ["an engine starts and increases in power", "waves crash against a shoreline and people speak"], "sample_ids": ["zjTG0gaGCUI", "yFB25fqfU8I"], "start_seconds": ["80", "300"], "properties": ["power, increase, engine", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "popping and crackling repeats as men yell and laugh"], "sample_ids": ["tK4VlLsNxak", "rqu8iB22IY"], "start_seconds": ["120", "5"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "sound, repeats, laugh"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", null], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a dog barks and a man speaks while music plays "], "question": "which entity is more like a scream", "label": 1}, {"captions": ["humming of idling and revving engine with a man speaking", "a stream of water runs briefly"], "sample_ids": ["wqADXCzngMw", "x-PeY8Yb8M4"], "start_seconds": ["340", "300"], "properties": ["audio, humming, revving", "stream, water, run"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a clock ticktocks continuously", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vlJS7LN2XyM", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["ticktocks, clock, ticktocks continuously", "loud, jet engine, roar"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a ticktock of a clock", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a telephone rings and a bird vocalizes", "a person snores loudly multiple times at a close distance"], "sample_ids": ["skd2PphS6oI", "sSMl2vc3ek"], "start_seconds": ["190", "20"], "properties": ["ring, bird, vocalize", "loud, multiple, distance"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", null], "captions_pred_audio": ["a telephone bell rings repeatedly ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "people cheer as a vehicle engine revs"], "sample_ids": ["xyx6eNVEYRY", "xjhAnI2q6hM"], "start_seconds": ["380", "6"], "properties": ["loud, engine, muffles", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "wind blows as people chatter quietly"], "sample_ids": ["smDKStoHBJo", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["a, infant, speaking", "wind, chatter, people"], "captions_pred_video": ["a man holding a crying baby in his arms", "footage is blurry and out of focus"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "a machine beeps continuously"], "sample_ids": ["yaln9y8I7ms", "y682ml90jGw"], "start_seconds": ["230", "11"], "properties": ["female, flushes, toilet", "beeps, machine, continuously"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a toilet flushes and a man speaks", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "birds chirp and objects are moved around"], "sample_ids": ["vJ7JPEFhyLA", "yPUYU6t3rwo"], "start_seconds": ["16", "370"], "properties": ["three men, wind, flow", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "an airplane engine runs"], "sample_ids": ["vddP56-ogds", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["water, splash, person, laugh", "engine, airplane, runs"], "captions_pred_video": [null, "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wyllXV6PjKo", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["a baby, a woman, a man", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman speaks and a baby cries", "a woman speaks while a helicopter flies overhead "], "question": "which entity is about a baby?", "label": 0}, {"captions": ["here comes the train and it starts to blow the horn and get close", "people speak as gunfire rings out"], "sample_ids": ["s7knHCFW82w", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["blow horn, get close, train", "gunfire, ring, speak"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "a door opens and closes"], "sample_ids": ["yswmmRZFItk", "vBHyYJ8pL0"], "start_seconds": ["0", "2"], "properties": ["background, frog, croak", "open, close, door"], "captions_pred_video": ["a close up of a frog in the water", null], "captions_pred_audio": ["a frog is croaking", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is a door?", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "a infant makes noise and is excited"], "sample_ids": ["s7knHCFW82w", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["blow horn, get close, train", "noise, excited, infant"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a baby cries and a woman speaks"], "question": "which is louder", "label": 0}, {"captions": ["a man speaks while rain falls onto a hard surface", "water flows and trickles"], "sample_ids": ["wqN6IIHw3po", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["rain, surface, fall", "water, flow, trickle"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking and water is splashing", "water is splashing and gurgling"], "question": "which entity is flowing", "label": 1}, {"captions": ["a dog barks and whimpers", "water splashes as an animal walks through"], "sample_ids": ["sShpyu2l4YQ", "w1ir-sZ3Im8"], "start_seconds": ["0", "90"], "properties": ["barks, whimpers, dog", "animal, water, splashes"], "captions_pred_video": ["the puppies are playing with a toy", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a dog is barking and growling", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "a helicopter engine runs continuously"], "sample_ids": ["vZAw4apG0Es", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["people, clock, converse", "engine, running, continuously"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a clock is ticking and people are talking", "a helicopter is flying overhead "], "question": "which entity is running continuously", "label": 1}, {"captions": ["an animal quacks rapidly", "a man speaks as a car is passing by"], "sample_ids": ["vh30P49Po6s", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["animal, quacks, rapidly", "a, car, pass"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "a child speaks in closed space"], "sample_ids": ["tDVADusiIoc", "yW6FWLSLkx4"], "start_seconds": ["60", "40"], "properties": ["man, radio, blows", "child, space, speak"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "a duck quacks continuously"], "sample_ids": ["tQWGZLItBXk", "vh30P49Po6s"], "start_seconds": ["170", "30"], "properties": ["voice, music, whoosh", "quacks, continuously, duck"], "captions_pred_video": ["worms revolution screenshots", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a duck is quacking loudly"], "question": "which entity is a duck?", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "water splashes as an animal walks through"], "sample_ids": ["s3cTDAj31g", "w1ir-sZ3Im8"], "start_seconds": ["80", "90"], "properties": ["man, talk, woman", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking and a baby is crying", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "three men talk while wind blows and some liquid flows"], "sample_ids": ["yeFvk9x0wWI", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["clack, bird, chirp", "three men, wind, flow"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a liquid flowing?", "label": 1}, {"captions": ["a clock ticktocks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["v-g-j2uTByM", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["ticktocks, clock, ticktocks", "loud, multiple, distance"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", null], "captions_pred_audio": ["a clock is ticking loudly", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["water bubbles and gurgles.", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["tB7hWb9gTuQ", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["bubbles, gurgles, water", "gun, shoot, water"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["water is splashing and gurgling", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a gun", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "a man speaks as a car is passing by"], "sample_ids": ["u--KhUW8l1Y", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["horn, siren, life", "a, car, pass"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["a person is burping while a girl speaks", "people speak as gunfire rings out"], "sample_ids": ["vdoxuJn9lTc", "wqTCwqVRDlk"], "start_seconds": ["40", "80"], "properties": ["person, burp, girl", "gunfire, ring, speak"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a child speaks followed by a burp", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["roadway noise occurs and a truck accelerates", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tgbONvsP47Y", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["noise, truck, accelerate", "airplane, boy, fly"], "captions_pred_video": ["footage of a fire truck entering a garage", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a car is driving on the road ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a bird is chirping and tweeting a bird song", "a woman speaks happily and an animal chirps"], "sample_ids": ["wPz6QRAkEb4", "uWAAAL4CIoc"], "start_seconds": ["60", "0"], "properties": ["chirps, tweets, song", "a woman, chirps, animal"], "captions_pred_video": ["a bird in a cage on top of a pole", null], "captions_pred_audio": ["birds are chirping in the background ", "a woman is speaking and a dog is barking "], "question": "which entity is a person", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "a child speaks in closed space"], "sample_ids": ["wvKpEYswXO0", "yW6FWLSLkx4"], "start_seconds": ["150", "40"], "properties": ["water, tap, run", "child, space, speak"], "captions_pred_video": ["of the person preparing food in the kitchen", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "a man speaks as a motor runs in the background"], "sample_ids": ["s7knHCFW82w", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["blow horn, get close, train", "background, motor, run"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a man speaking to a background?", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["wz7N8YRy74I", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["rooster, crow, background, people", "a, scream, girl"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "someone is typing on a computer keyboard"], "sample_ids": ["wSVhSdj0F0", "v0x1odnXtP0"], "start_seconds": ["10", "210"], "properties": ["horn honks, keys jingle, slam", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a person is typing on a keyboard"], "question": "which entity is typing on a computer keyboard", "label": 1}, {"captions": ["a person speaks briefly", "continuous sneezing together with speech"], "sample_ids": ["zOZleIRqZm4", "x4dZyf9Gbj0"], "start_seconds": ["80", "130"], "properties": ["person, talk, brief", "continuous, sneeze, speech"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman sneezes and speaks"], "question": "which entity is more like a person talking", "label": 0}, {"captions": ["a vehicle engine revs and tires squeal", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["yDoT73BWsdA", "xfaoyyzw2WU"], "start_seconds": ["10", "180"], "properties": ["engine revs, tires squeal, vehicle", "loud, jet engine, roar"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "a man speaks as a motor runs in the background"], "sample_ids": ["yYEVLuqEytU", "xZepNM9qcRA"], "start_seconds": ["40", "30"], "properties": ["grunt, slurp, background", "background, motor, run"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "a frog vocalizes as birds chirp"], "sample_ids": ["yajyRTUQk3U", "wqUmIEzuNz4"], "start_seconds": ["400", "30"], "properties": ["noise, woman, speak", "frog, bird, vocalize"], "captions_pred_video": ["- a woman cooking in the kitchen", "a frog sitting in the grass on a sunny day"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a cat meows and rustles"], "question": "which entity is a frog", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "a helicopter engine idles continuously"], "sample_ids": ["ugHJF0hfYkg", "ugHJF0hfYkg"], "start_seconds": ["10", "10"], "properties": ["loud, intense, propeller", "engine, idle, continuously"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a helicopter is flying overhead ", "a helicopter is flying overhead "], "question": "which entity is quieter", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["yZrFNS7GFBQ", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["pigeon, buzzes, insect", "clickety-clack, train, whistle"], "captions_pred_video": ["of the bird in the cage", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["an owl hoots in the background ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "waves crash against a shoreline and wind blows"], "sample_ids": ["uJV8NDaHqqk", "zdYdyF9-m8U"], "start_seconds": ["100", "7"], "properties": ["loud, fly, chirp", "wind, crash, shoreline"], "captions_pred_video": ["a bee hive in a wooden box", "a person kayaking in the ocean near a cliff"], "captions_pred_audio": ["a swarm of bees buzzing around", "waves crash and wind blows "], "question": "which entity is more quiet", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vJ7JPEFhyLA", "w5W5Kqtc8E"], "start_seconds": ["16", "100"], "properties": ["three men, wind, flow", "wind, blow, vehicle"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has more wind blowing", "label": 1}, {"captions": ["speaking following by laughing and clapping", "paper folding and crinkling"], "sample_ids": ["u2f5NpsoHBg", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["person, laugh, clap", "paper, fold, crinkle"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "the wind blows and a mouse clicks "], "question": "which entity is not a person?", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "popping and crackling repeats as men yell and laugh"], "sample_ids": ["su6FAOcOA8c", "rqu8iB22IY"], "start_seconds": ["4", "5"], "properties": ["engine, run, woman", "sound, repeats, laugh"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", null], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a dog barks and a man speaks while music plays "], "question": "which entity has a woman making an announcement?", "label": 0}, {"captions": ["a woman speaks with water running", "a toilet flushes and a female speaks"], "sample_ids": ["wTideSjRFS0", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["water, running, woman", "female, flushes, toilet"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a toilet flushes and a man speaks"], "question": "which entity has a woman speaking while water is running?", "label": 0}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "paper is crumpling consistently"], "sample_ids": ["zkKdxzNC97Y", "v5cSxLaHADY"], "start_seconds": ["27", "0"], "properties": ["loud, bang, noise", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a door is opened and closed", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["some clanking with distant murmuring", "wind blows as people chatter quietly"], "sample_ids": ["uMTTDZ2mb4", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["clanking, murmuring, distant", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["long loud burping by a man", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["xmiUIOhtZyQ", "uZesmtKZGSw"], "start_seconds": ["60", "250"], "properties": ["loud, burp, man", "men, talk, cars"], "captions_pred_video": ["homer simpson drinking a beer", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a person burps and music plays in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vuUVPzd2FXw", "su6FAOcOA8c"], "start_seconds": ["160", "4"], "properties": ["a, steam, release", "engine, idle, woman"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a woman is speaking and a subway train is moving "], "question": "which entity is a man?", "label": 0}, {"captions": ["multiple motorcycles pass by as a man speaks", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["zcDwZ6W7E3E", "tiDFTC-5vU"], "start_seconds": ["180", "30"], "properties": ["man, speak, motorcycles", "male, duck, laugh"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", null], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking?", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "paper is crumpling consistently"], "sample_ids": ["wwyfGO2J4", "v5cSxLaHADY"], "start_seconds": ["90", "0"], "properties": ["people, applaud, hoot", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "a stream of water flows as people talk and wind blows"], "sample_ids": ["w34HjHr6gAY", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["beeps, hit, woman", "stream, water, flow"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "footage is blurry and out of focus"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing?", "label": 1}, {"captions": ["the rumbling of a bus followed by a soft male voice", "people speak as gunfire rings out"], "sample_ids": ["vK93VuO0yNc", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["male voice, bus, rumble", "gunfire, ring, speak"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a car drives by with wind noise in the background ", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["uKCSGgof8gI", "xKB8O8LTs6s"], "start_seconds": ["12", "70"], "properties": ["chirps, distance, signal", "music, gunfire, explosion"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "an insect buzzes around continuously"], "sample_ids": ["sjlVMgdGSK0", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["accelerates, vehicle, race car", "buzzes, continuously, insect"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a fly is buzzing around a microphone "], "question": "which entity buzzes around continuously", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["u--KhUW8l1Y", "w5W5Kqtc8E"], "start_seconds": ["0", "100"], "properties": ["horn, siren, life", "wind, blow, vehicle"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", null], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "a man speaks as a car is passing by"], "sample_ids": ["wyllXV6PjKo", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["a baby, a woman, a man", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman speaks and a baby cries", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a baby?", "label": 0}, {"captions": ["people speak and tapping occurs", "repeated tapping is accompanied by water running and a woman speaking softly"], "sample_ids": ["tFCUUGdREgA", "wvKpEYswXO0"], "start_seconds": ["70", "150"], "properties": ["people, tap, speak", "sound, water, running"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a woman is speaking and tapping with background noise and water running "], "question": "which entity has a woman speaking softly?", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "an airplane engine runs"], "sample_ids": ["zl9Dqx-j7q4", "yVPZ2MNWpms"], "start_seconds": ["6", "0"], "properties": ["engine, laugh, loud", "engine, airplane, runs"], "captions_pred_video": ["footage of a man driving a car in the dark", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a jet engine roars ", "a car is driving by on the road "], "question": "which entity is a machine", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["vbr9mHKc8WM", "vlS6YMeWAPo"], "start_seconds": ["40", "40"], "properties": ["noise, loudness, engine", "sheep, baa, birds"], "captions_pred_video": [null, "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["an engine is idling", "a goat bleats and birds chirp"], "question": "which entity is quieter", "label": 0}, {"captions": ["a man speaks as music plays before artillery is fired", "a woman speaks happily and an animal chirps"], "sample_ids": ["vcmWSmvti8", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["music, man, fire", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a woman is speaking and a dog is barking "], "question": "which entity is more peaceful", "label": 1}, {"captions": ["water running down a sink while a man is talking", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vSeGhaZt-aI", "w5W5Kqtc8E"], "start_seconds": ["50", "100"], "properties": ["water, sink, talk", "wind, blow, vehicle"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", null], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about water running down a sink?", "label": 0}, {"captions": ["leaves rustle while man speaks", "plastic is tapped on while someone speaks"], "sample_ids": ["zOZleIRqZm4", "wvKpEYswXO0"], "start_seconds": ["80", "150"], "properties": ["leaves, rustle, speak", "plastic, tap, speak"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["vs65y4qmyBE", "wqZ135Ssz0"], "start_seconds": ["340", "60"], "properties": ["engine, run, man", "two men, woman, birds"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "an airplane engine spools and people speak"], "sample_ids": ["uEU-Hg5MTN8", "wTjoRj1se3U"], "start_seconds": ["27", "390"], "properties": ["a woman, laughs, animal", "airplane, engine, spool"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a jet engine is running and people are talking"], "question": "which entity is a video of a person speaking and an animal snorting?", "label": 0}, {"captions": ["water splashes and wind noise is made into a microphone", "propeller rearing loudly with some male and female voices interspersed in the background"], "sample_ids": ["sDSppXIlJrs", "vqZuVbG6-HI"], "start_seconds": ["27", "130"], "properties": ["microphone, water, wind", "background, male, female"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "footage is blurry because it's raining outside"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a lawn mower is running and men are speaking "], "question": "which entity has more background noise", "label": 1}, {"captions": ["roadway noise occurs and a truck accelerates", "a car accelerates and wind blows"], "sample_ids": ["tgbONvsP47Y", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["noise, truck, accelerate", "accelerates, wind, blows"], "captions_pred_video": ["footage of a fire truck entering a garage", null], "captions_pred_audio": ["a car is driving on the road ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["someone snores nearby", "multiple people speak and children yell while water gurgles"], "sample_ids": ["spJCm8tD9Zo", "vb1fPSDI4c"], "start_seconds": ["90", "30"], "properties": ["someone snores, nearby, someone", "multiple, people, yell"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a sleeping person emits a gravely snore", "three men talk while wind blows and some liquid flows"], "sample_ids": ["w2JXXIAdUdg", "vJ7JPEFhyLA"], "start_seconds": ["10", "16"], "properties": ["emits, sleeping, person", "three men, wind, flow"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a person emitting a gravely snore?", "label": 0}, {"captions": ["motors rev and run loudly as a person laughs", "three men talk while wind blows and some liquid flows"], "sample_ids": ["zl9Dqx-j7q4", "vJ7JPEFhyLA"], "start_seconds": ["6", "16"], "properties": ["motors rev, laugh, loudly", "three men, wind, flow"], "captions_pred_video": ["footage of a man driving a car in the dark", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a jet engine roars ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "someone whistles a tune"], "sample_ids": ["u21-Z5gJCB8", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["background, voice, man", "someone, tune, whistle"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a stream runs then someone speaks", "people speak as gunfire rings out"], "sample_ids": ["wbHTKEJZyhc", "wqTCwqVRDlk"], "start_seconds": ["20", "80"], "properties": ["stream, run, someone", "gunfire, ring, speak"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a man is speaking and a gun is fired"], "question": "which entity is more calm", "label": 0}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["slZLHwNbbt4", "w5W5Kqtc8E"], "start_seconds": ["300", "100"], "properties": ["clap, distance, horn", "wind, blow, vehicle"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", null], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blowing?", "label": 1}, {"captions": ["an airplane engine roars increasingly louder", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vBslzh7saPw", "w5W5Kqtc8E"], "start_seconds": ["90", "100"], "properties": ["engine, roar, louder", "wind, blow, vehicle"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "water splashes and wind noise is made into a microphone"], "sample_ids": ["y4tPJXBKDig", "sDSppXIlJrs"], "start_seconds": ["20", "27"], "properties": ["a, noise, talk", "microphone, water, wind"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "a man is paddling a small wooden boat in the water"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "the wind is blowing and water is splashing"], "question": "which noise is made by water and wind", "label": 1}, {"captions": ["a man is filing a hard object", "a car accelerates and wind blows"], "sample_ids": ["vveS8HT7Uog", "u0TrcHhkPQ"], "start_seconds": ["100", "20"], "properties": ["a man, hard, object", "accelerates, wind, blows"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", null], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a race car accelerates and revs its engine "], "question": "which object is moving", "label": 1}, {"captions": ["a stream of water runs briefly", "a man speaks as a car is passing by"], "sample_ids": ["x-PeY8Yb8M4", "sK4u5T8hW78"], "start_seconds": ["300", "30"], "properties": ["stream, water, run", "a, car, pass"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a car is driving on a wet road ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks followed by another man speaking outside", "a woman speaks as she rubs two objects together"], "sample_ids": ["viuTg1M-dqg", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["two men, speak, follow", "two objects, woman, speak"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is about a woman speaking?", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "a child speaks in closed space"], "sample_ids": ["tDVADusiIoc", "yW6FWLSLkx4"], "start_seconds": ["60", "40"], "properties": ["wind, radio, waves", "child, space, speak"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a man speaks as a machine runs", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vD6lYD1l0BY", "uEU-Hg5MTN8"], "start_seconds": ["330", "27"], "properties": ["a, machine, run", "a woman, laughs, animal"], "captions_pred_video": ["game controller being held in the hands of the person", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a woman is speaking and a baby is crying"], "question": "which entity has a man speaking as a machine runs?", "label": 0}, {"captions": ["a person sniffles and sneezes", "people speak and tapping occurs"], "sample_ids": ["uRlbY6aoBU", "tFCUUGdREgA"], "start_seconds": ["0", "70"], "properties": ["sneezes, sniffles, person", "people, tap, speak"], "captions_pred_video": [null, "a person riding a white horse in an indoor arena"], "captions_pred_audio": ["a man is sneezing ", "a man is speaking and walking with wind noise in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "an infant crying frantically"], "sample_ids": ["vmrxwuAMb2I", "zwOBqeFTgiU"], "start_seconds": ["40", "30"], "properties": ["a dog, inhales, exhales", "cry, infant, frantically"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "of the baby crying in the car seat"], "captions_pred_audio": ["a dog barks and growls", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a horn rings out as a machine runs by"], "sample_ids": ["xKB8O8LTs6s", "slZLHwNbbt4"], "start_seconds": ["70", "300"], "properties": ["music, radio, gunshots", "a, horn, run"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity has a horn", "label": 1}, {"captions": ["a mechanical buzzing getting louder", "paper is crumpling consistently"], "sample_ids": ["sEprKHm8Sj8", "v5cSxLaHADY"], "start_seconds": ["90", "0"], "properties": ["noise, loud, buzzing", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["birds chirp, a woman speaks, and insects buzz", "a woman speaks happily and an animal chirps"], "sample_ids": ["t97k0cejSQE", "uWAAAL4CIoc"], "start_seconds": ["250", "0"], "properties": ["sound, chirp, buzz", "a woman, chirps, animal"], "captions_pred_video": ["a bee on a purple thistle flower", null], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a woman is speaking and a dog is barking "], "question": "which entity has a higher pitch", "label": 1}, {"captions": ["a person sniffles and then sneezes in the distance", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["uRlbY6aoBU", "xKB8O8LTs6s"], "start_seconds": ["0", "70"], "properties": ["a, distance, sneeze", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is sneezing ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a man talks as several small engines run", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["u9A6VZQCZpU", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["a, man, talk", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio?", "label": 1}, {"captions": ["loud, continuous burping", "people applaud and hoot and chat quietly"], "sample_ids": ["y636gklDioE", "wwyfGO2J4"], "start_seconds": ["20", "90"], "properties": ["loud, continuous, burping", "people, applaud, hoot"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", null], "captions_pred_audio": ["a person burps loudly several times", "people are clapping and speaking with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "a toilet flushes and a female speaks"], "sample_ids": ["y2bVZ7rz-5M", "yaln9y8I7ms"], "start_seconds": ["280", "230"], "properties": ["engine, horn, siren", "female, flushes, toilet"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "footage is blurry and out of focus"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a toilet flushes and a man speaks"], "question": "which entity has a female speaking?", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["s6DESzUTGjY", "wz7N8YRy74I"], "start_seconds": ["16", "30"], "properties": ["wind, laugh, woman", "rooster, crow, background, men"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["ukxt9I7eMMg", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["continuous, woman, speaking", "men, talk, cars"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has a woman speaking towards the end?", "label": 0}, {"captions": ["a man speaks with another voice speaking in the background", "a horn rings out as a machine runs by"], "sample_ids": ["u21-Z5gJCB8", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["background, voice, man", "a, horn, run"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is about a machine running by?", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "paper is crumpling consistently"], "sample_ids": ["y8WEcpOlT3I", "v5cSxLaHADY"], "start_seconds": ["40", "0"], "properties": ["harsh, wind, blows", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["on how to use a sewing machine youtube", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vmrxwuAMb2I", "xKB8O8LTs6s"], "start_seconds": ["40", "70"], "properties": ["a dog, inhales, exhales", "music, gunfire, explosion"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a dog barks and growls", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more active", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["tEE3MpBt1sg", "wz7N8YRy74I"], "start_seconds": ["50", "30"], "properties": ["drill, something, laugh", "rooster, crow, background, men"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["xZepNM9qcRA", "vlS6YMeWAPo"], "start_seconds": ["30", "40"], "properties": ["background, motor, run", "sheep, baa, birds"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["vlJS7LN2XyM", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["background, clocks, ticking", "beeps, hit, woman"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a ticktock of a clock", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["paper is crumpling consistently", "an infant crying as a woman laughs"], "sample_ids": ["v5cSxLaHADY", "xhmRY9yhC7c"], "start_seconds": ["0", "20"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "a, laugh, infant"], "captions_pred_video": ["footage of the person holding a pair of scissors", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["paper is crumpled and crinkled", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["tapping occurs then a baby cries", "a person is whistling"], "sample_ids": ["wIJK3-5y0kA", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["a, cry, baby", "person, whistling, person"], "captions_pred_video": ["of a baby playing with a cat in a dark room", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a person whistling a song"], "question": "which entity is a person?", "label": 1}, {"captions": ["birds chirp and a pop occurs before a man speaks", "a man speaks followed by another man speaking outside"], "sample_ids": ["zuua6-5goWw", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["sound, pop, bird", "two men, speak, follow"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more than one speaker", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["y2ZBGpgbhHM", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["birds, tweet, pant", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["birds chirping and a dog panting", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a machine runs continuously", "waves crash against a shoreline and people speak"], "sample_ids": ["wdXV3Pv0jiY", "yFB25fqfU8I"], "start_seconds": ["11", "300"], "properties": ["machine, running, continuously", "wave, crash, shoreline"], "captions_pred_video": ["footage is blurry and shaky", "footage of a person surfing in the ocean"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is not a machine?", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "dishes cling together then a man begins to speak"], "sample_ids": ["yZrFNS7GFBQ", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["pigeon, buzzes, insect", "cling, speak, dishes"], "captions_pred_video": ["of the bird in the cage", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["an owl hoots in the background ", "mechanisms are operating and water is splashing "], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "a man speaks followed by another man speaking outside"], "sample_ids": ["xvDdE3zNf8Y", "viuTg1M-dqg"], "start_seconds": ["120", "30"], "properties": ["a, female, speaks", "two men, speak, follow"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a person speaks briefly", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["zOZleIRqZm4", "uYT5gxnyMWM"], "start_seconds": ["80", "50"], "properties": ["person, talk, brief", "female, spraying, scream"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking and a baby is crying"], "question": "which entity is a person talking?", "label": 0}, {"captions": ["a person snoring", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["t8tv5YRMJUg", "zl9Dqx-j7q4"], "start_seconds": ["0", "6"], "properties": ["a person, snore, loud", "engine, laugh, loud"], "captions_pred_video": ["of a man getting his face licked by another man", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a person sniffs and breathes heavily", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["ziUT9IFTkjg", "uYT5gxnyMWM"], "start_seconds": ["10", "50"], "properties": ["background, birds, rustling", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "three men talk while wind blows and some liquid flows"], "sample_ids": ["smDKStoHBJo", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["a, talk, baby, cry", "three men, wind, flow"], "captions_pred_video": ["a man holding a crying baby in his arms", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["an engine runs and wind blows", "a vehicle engine accelerating then running on idle"], "sample_ids": ["vs65y4qmyBE", "vYkA3cfXp5Q"], "start_seconds": ["340", "30"], "properties": ["engine, run, wind", "engine, accelerate, idle"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "an engine is idling"], "question": "which entity is a vehicle engine?", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["yYEVLuqEytU", "ukg5L09Wpvo"], "start_seconds": ["40", "150"], "properties": ["grunt, slurp, background", "clickety-clack, train, whistle"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "a clock ticktocks"], "sample_ids": ["vVhthZ45k3Y", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["cat, purr, hiss", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage is blurry and out of focus", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["bees buzz and wind blows", "a horn rings out as a machine runs by"], "sample_ids": ["tMJne1a4AFI", "slZLHwNbbt4"], "start_seconds": ["0", "300"], "properties": ["bees buzz, wind blows, bees", "a, horn, run"], "captions_pred_video": ["a swarm of bees on the ground", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a swarm of bees buzzing around", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is moving", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "birds chirp and wind blows"], "sample_ids": ["s4Uz1Ffgo04", "sxIvBMSavMQ"], "start_seconds": ["100", "210"], "properties": ["roars, background, people speaking", "birds, chirp, wind"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "birds are chirping and insects are buzzing"], "question": "which entity is quieter", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vmrxwuAMb2I", "yajyRTUQk3U"], "start_seconds": ["40", "400"], "properties": ["a dog, inhales, exhales", "a woman, something, fried"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a dog barks and growls", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a man talks as several small engines run", "vehicles pass by on a roadway"], "sample_ids": ["u9A6VZQCZpU", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["a, man, talk", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a car is driving on the road "], "question": "which entity is about vehicles?", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "an airplane accelerates briefly"], "sample_ids": ["xjvTpk2Zpr8", "zjTG0gaGCUI"], "start_seconds": ["70", "80"], "properties": ["wind, blows, vehicle", "accelerates, airplane, briefly"], "captions_pred_video": ["footage of a dhl plane landing on the runway", null], "captions_pred_audio": ["a jet engine roars and wind blows ", "a jet engine roars as wind blows "], "question": "which is not a vehicle", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "a man speaks as a car is passing by"], "sample_ids": ["vimzuGQvdcU", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["a, man, yells", "a, car, pass"], "captions_pred_video": ["a group of people are rafting down a river", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more calm", "label": 1}, {"captions": ["loud, continuous burping", "a person speaks briefly"], "sample_ids": ["y636gklDioE", "zOZleIRqZm4"], "start_seconds": ["20", "80"], "properties": ["loud, continuous, burping", "person, talk, brief"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a person burps loudly several times", "a man is speaking with crickets chirping in the background"], "question": "which entity is talking", "label": 1}, {"captions": ["ticking continues without interruption", "a train horn blows as it passes by"], "sample_ids": ["v-g-j2uTByM", "zVacuqSb4LI"], "start_seconds": ["30", "30"], "properties": ["ticking, continuous, clock", "horn, blows, train"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a clock is ticking loudly", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is not continuous", "label": 1}, {"captions": ["a person is whistling", "a man speaks as a motor runs in the background"], "sample_ids": ["sIXTftIuUgw", "xZepNM9qcRA"], "start_seconds": ["90", "30"], "properties": ["person, whistling, person", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a person whistling a song", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a low rumbling in the distance followed by a motorcycle engine revving up", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vr8ZXjEBhMQ", "su6FAOcOA8c"], "start_seconds": ["150", "4"], "properties": ["sound, distance, engine", "engine, idle, woman"], "captions_pred_video": ["is taken from a motorcycle's point of view", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["weDbePuc-Xc", "yDoT73BWsdA"], "start_seconds": ["40", "10"], "properties": ["cartoon character, music, vocalize", "engine, revs, vehicle"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["people clap and speak in the distance", "motors rev and run loudly as a person laughs"], "sample_ids": ["wwyfGO2J4", "zl9Dqx-j7q4"], "start_seconds": ["90", "6"], "properties": ["clap, distance, speak", "motors rev, laugh, loudly"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a sleeping person emits a gravely snore", "water flows as men speak and yell"], "sample_ids": ["w2JXXIAdUdg", "vJ7JPEFhyLA"], "start_seconds": ["10", "16"], "properties": ["emits, sleeping, person", "water, flow, men"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video", "label": 1}, {"captions": ["water is sprayed across a hard surface", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["sQwlkXjQabo", "yDoT73BWsdA"], "start_seconds": ["10", "10"], "properties": ["water, spray, surface", "engine, revs, vehicle"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["spraying followed by silence", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["water bubbles and gurgles.", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["tB7hWb9gTuQ", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["bubbles, gurgles, water", "music, gunfire, explosion"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["water is splashing and gurgling", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "a man speaks followed by another man speaking outside"], "sample_ids": ["ylpYOorfH4o", "viuTg1M-dqg"], "start_seconds": ["410", "30"], "properties": ["engine, run, loud", "two men, speak, follow"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more than one speaker?", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a woman speaks as she rubs two objects together"], "sample_ids": ["wz7N8YRy74I", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["rooster, crow, background, people", "two objects, woman, speak"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a toilet flushes and water drains", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["sfAvvZwdLCY", "w5W5Kqtc8E"], "start_seconds": ["20", "100"], "properties": ["water drains, flushes, water", "wind, blow, vehicle"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a train engine runs and a horn blows", "wind blows as people chatter quietly"], "sample_ids": ["zPX9o1uDiI", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["engine, horn, run", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vdoxuJn9lTc", "wz7N8YRy74I"], "start_seconds": ["40", "30"], "properties": ["burp, loud, girl", "rooster, crow, background, men"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a child speaks followed by a burp", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "birds chirp and objects are moved around"], "sample_ids": ["zofjfKhqLk8", "yPUYU6t3rwo"], "start_seconds": ["10", "370"], "properties": ["noise, stop, motor", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "people cheer as a vehicle engine revs"], "sample_ids": ["u21-Z5gJCB8", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["background, voice, man", "engine revs, vehicle, people"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a truck is revving its engine and a man is speaking "], "question": "which entity has more people", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "multiple motorcycles pass by as a man speaks"], "sample_ids": ["w6RTHR6AeAg", "zcDwZ6W7E3E"], "start_seconds": ["40", "180"], "properties": ["call, owl, screech", "man, speak, motorcycles"], "captions_pred_video": [null, "2 people riding motorcycles down a mountain road with trees lining the sides of the road"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a man is speaking while a car accelerates and revs its engine "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["yZrFNS7GFBQ", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["pigeon, buzzes, insect", "two men, woman, birds"], "captions_pred_video": ["of the bird in the cage", null], "captions_pred_audio": ["an owl hoots in the background ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is talking", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "a chime of a clock followed by various tones of ticking with come clinking"], "sample_ids": ["siJFXfGWgDk", "uqFtmnhuqA8"], "start_seconds": ["50", "30"], "properties": ["man, woman, vehicle", "a, b, c"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "shows a clock on the wall of a room with a person standing in front of it"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "mechanisms are ticking and a hammer is striking "], "question": "which entity is a clock?", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "vehicles pass by on a roadway"], "sample_ids": ["sLUnaPT5gM8", "tgbONvsP47Y"], "start_seconds": ["0", "0"], "properties": ["loud, laughter, intermittent", "pass, vehicle, roadway"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a jet engine spools up and takes off", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["vBslzh7saPw", "yDoT73BWsdA"], "start_seconds": ["90", "10"], "properties": ["engine, spools, takes", "engine, revs, vehicle"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a race car accelerates and revs its engine "], "question": "which engine is revving", "label": 1}, {"captions": ["birds chirp and objects are moved around", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["yPUYU6t3rwo", "tw76HGONaKg"], "start_seconds": ["370", "570"], "properties": ["birds chirp, objects are moved around, birds", "A, game, keyboard"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["insects buzz and a man speaks", "a man speaks and types on a computer keyboard "], "question": "which entity is a video of a man playing a video game?", "label": 1}, {"captions": ["goats bleat and metal clings", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["tH17JPjDPnc", "xKB8O8LTs6s"], "start_seconds": ["260", "70"], "properties": ["bleat, metal, clings", "music, gunfire, explosion"], "captions_pred_video": ["feed of the goats eating hay in the barn", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a train horn blares as a train passes, then fades", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["zVacuqSb4LI", "tw76HGONaKg"], "start_seconds": ["30", "570"], "properties": ["blares, fades, train", "A, game, keyboard"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a man speaks and types on a computer keyboard "], "question": "which entity is a video of a person playing a video game?", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["tQWGZLItBXk", "tDlysoZiA1I"], "start_seconds": ["170", "0"], "properties": ["voice, music, whoosh", "animal, grunts, chirps"], "captions_pred_video": ["worms revolution screenshots", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "birds are chirping and a rooster is crowing "], "question": "which entity is more animal-like", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "an infant crying as a woman laughs"], "sample_ids": ["sU53zg9Jp7s", "xhmRY9yhC7c"], "start_seconds": ["380", "20"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "a, laugh, infant"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "multiple people speak and children yell while water gurgles"], "sample_ids": ["shmR4OZtzqA", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["man, engine, idle", "multiple, people, yell"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", null], "captions_pred_audio": ["a man speaks while a motor runs", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["sawing of wood and rustling with leaves blowing in the distance", "a stream of water runs briefly"], "sample_ids": ["uiItxDsDMFI", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["sound, distance, leaves", "stream, water, run"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a saw is being used with background noise ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "a woman speaks as she rubs two objects together"], "sample_ids": ["u6jIvCtKarQ", "vzxHnu-SFEw"], "start_seconds": ["70", "80"], "properties": ["a, man, speaks", "two objects, woman, speak"], "captions_pred_video": ["footage of a person using a blender on a stove top", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a man speaking over glass clinking?", "label": 0}, {"captions": ["water pouring and bubbling", "paper is crumpling consistently"], "sample_ids": ["uyRfq-jKPpo", "v5cSxLaHADY"], "start_seconds": ["50", "0"], "properties": ["water, bubbles, pouring", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["water is running from a faucet", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["women speak and laugh as wind blows", "a car speeding up in the distance"], "sample_ids": ["un9VQlzgZM", "u0TrcHhkPQ"], "start_seconds": ["5", "20"], "properties": ["wind, speak, laugh", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["an airplane accelerates briefly", "paper is crumpling consistently"], "sample_ids": ["zjTG0gaGCUI", "v5cSxLaHADY"], "start_seconds": ["80", "0"], "properties": ["accelerates, airplane, briefly", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a jet engine roars as wind blows ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a woman talking as an infant is crying", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["tMbMDvT50j8", "zl9Dqx-j7q4"], "start_seconds": ["12", "6"], "properties": ["a, talk, infant", "engine, laugh, loud"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a baby cries and a woman speaks", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["an airplane engine runs", "people cheer as a vehicle engine revs"], "sample_ids": ["yVPZ2MNWpms", "xjhAnI2q6hM"], "start_seconds": ["0", "6"], "properties": ["engine, airplane, runs", "engine revs, vehicle, people"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a car is driving by on the road ", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zcDwZ6W7E3E", "tdWhHV3X25Q"], "start_seconds": ["180", "60"], "properties": ["a, man, speak", "applause, audience, yells"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a car accelerates and wind blows"], "sample_ids": ["vZAw4apG0Es", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["background, tick, repeat", "accelerates, wind, blows"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks while video game music plays with some clicking", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tw76HGONaKg", "yajyRTUQk3U"], "start_seconds": ["570", "400"], "properties": ["music, click, man", "a woman, something, fried"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a beep repeats multiple times", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["y682ml90jGw", "zl9Dqx-j7q4"], "start_seconds": ["11", "6"], "properties": ["beep, repeat, multiple", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a beeping sound is being made ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "paper is crumpling consistently"], "sample_ids": ["uPDn2BFTHk", "v5cSxLaHADY"], "start_seconds": ["140", "0"], "properties": ["woman, laughs, speaks", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a baby laughs and a woman speaks", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "an infant crying as a woman laughs"], "sample_ids": ["uOpoD0gGXcs", "xhmRY9yhC7c"], "start_seconds": ["120", "20"], "properties": ["chirps, woman, bird", "a, laugh, infant"], "captions_pred_video": ["a herd of cows grazing in the field", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["birds are chirping and a man is speaking", "a baby cries and a woman speaks"], "question": "which entity is a human", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "vehicles pass by on a roadway"], "sample_ids": ["vXlk0lIQBFo", "tgbONvsP47Y"], "start_seconds": ["470", "0"], "properties": ["wind, speak, vocalize", "pass, vehicle, roadway"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "footage of a fire truck entering a garage"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "wind blows as people chatter quietly"], "sample_ids": ["uEU-Hg5MTN8", "xBxDz0CFVn0"], "start_seconds": ["27", "30"], "properties": ["a woman, laughs, animal", "wind, chatter, people"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vXlk0lIQBFo", "uYT5gxnyMWM"], "start_seconds": ["470", "50"], "properties": ["wind, talk, vocalize", "a, scream, girl"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "people cheer as a vehicle engine revs"], "sample_ids": ["wqADXCzngMw", "xjhAnI2q6hM"], "start_seconds": ["340", "6"], "properties": ["engine, idle, man", "engine revs, vehicle, people"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle engine?", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "a propeller rotates loudly and intensely"], "sample_ids": ["xBxDz0CFVn0", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["stream, water, flow", "loud, intense, propeller"], "captions_pred_video": ["footage is blurry and out of focus", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a helicopter is flying overhead "], "question": "which entity is quieter", "label": 0}, {"captions": ["a baby cries and a woman speaks", "a duck quacks continuously"], "sample_ids": ["tMbMDvT50j8", "vh30P49Po6s"], "start_seconds": ["12", "30"], "properties": ["a, cry, woman", "quacks, continuously, duck"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a baby cries and a woman speaks", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "three men talk while wind blows and some liquid flows"], "sample_ids": ["yI-KvObbDoY", "vJ7JPEFhyLA"], "start_seconds": ["260", "16"], "properties": ["sound, smack, wind", "three men, wind, flow"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a liquid flowing?", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["w5W5Kqtc8E", "wDVMhEdTiVw"], "start_seconds": ["100", "30"], "properties": ["water, splashes, motorboat", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is about water?", "label": 0}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "a car speeding up in the distance"], "sample_ids": ["wRBHTgrbiwg", "u0TrcHhkPQ"], "start_seconds": ["50", "20"], "properties": ["bird, owl, speak", "distance, car, speed"], "captions_pred_video": ["of a bee pollinating the flowers in the field", null], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "an engine idles quietly then gradually becomes louder"], "sample_ids": ["wSVhSdj0F0", "vbr9mHKc8WM"], "start_seconds": ["10", "40"], "properties": ["horn honks, keys jingle, slam", "noise, loudness, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "an engine is idling"], "question": "which entity is quieter", "label": 1}, {"captions": ["birds coo incessantly", "a clock ticktocks"], "sample_ids": ["yZrFNS7GFBQ", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["coo, bird, incessant", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of the bird in the cage", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["an owl hoots in the background ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaks uses a drill", "an infant crying as a woman laughs"], "sample_ids": ["x5eIC7S0fbg", "xhmRY9yhC7c"], "start_seconds": ["60", "20"], "properties": ["A man is speaking, uses a drill, and is a tool", "a, laugh, infant"], "captions_pred_video": ["a person in surgical gloves is using a needle to remove a small object from a tooth", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and using a power tool ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["v0x1odnXtP0", "zj2R0XoFr5k"], "start_seconds": ["210", "50"], "properties": ["keyboard, type, computer", "airplane, boy, fly"], "captions_pred_video": ["how to make money on youtube in spanish", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a person is typing on a keyboard", "a woman speaks while a helicopter flies overhead "], "question": "which object is moving", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "paper is crumpling consistently"], "sample_ids": ["uRExseg-0XI", "v5cSxLaHADY"], "start_seconds": ["210", "0"], "properties": ["woman, man, water", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["xBxDz0CFVn0", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["stream, water, flow", "wind, blow, vehicle"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["goats bleat and metal clings", "a man speaks as a car is passing by"], "sample_ids": ["tH17JPjDPnc", "sK4u5T8hW78"], "start_seconds": ["260", "30"], "properties": ["bleat, metal, clings", "a, car, pass"], "captions_pred_video": ["feed of the goats eating hay in the barn", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "a woman speaks happily and an animal chirps"], "sample_ids": ["y2ZBGpgbhHM", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["birds, tweet, pant", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds chirping and a dog panting", "a woman is speaking and a dog is barking "], "question": "which entity is more likely to be in a forest", "label": 0}, {"captions": ["birds chirp and a pop occurs before a man speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["zuua6-5goWw", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["sound, pop, bird", "engine, laugh, loud"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a jet engine roars "], "question": "which entity is followed by a man laughing", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["se87d6yxEOA", "uZesmtKZGSw"], "start_seconds": ["10", "250"], "properties": ["run, whistle, pass", "men, talk, cars"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is moving faster", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "a child speaks in closed space"], "sample_ids": ["vqZuVbG6-HI", "yW6FWLSLkx4"], "start_seconds": ["130", "40"], "properties": ["background, male, female", "child, space, speak"], "captions_pred_video": ["footage is blurry because it's raining outside", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "small dogs yip and bark sharply"], "sample_ids": ["sapQIQUhFc", "v-wcQf4BDY0"], "start_seconds": ["280", "120"], "properties": ["liquid, flow, distance", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "people cheer as a vehicle engine revs"], "sample_ids": ["wvKpEYswXO0", "xjhAnI2q6hM"], "start_seconds": ["150", "6"], "properties": ["plastic, tap, speak", "engine revs, vehicle, people"], "captions_pred_video": ["of the person preparing food in the kitchen", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a truck is revving its engine and a man is speaking "], "question": "what is being tapped on?", "label": 0}, {"captions": ["a man speaks then multiple motorcycles pass by", "a woman speaks as she rubs two objects together"], "sample_ids": ["zcDwZ6W7E3E", "vzxHnu-SFEw"], "start_seconds": ["180", "80"], "properties": ["a, man, speak", "two objects, woman, speak"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["multiple birds chirp and an animal grunts", "a woman and man speak while food is frying"], "sample_ids": ["tDlysoZiA1I", "zk-xJGQU8-4"], "start_seconds": ["0", "130"], "properties": ["animal, grunt, multiple", "food, man, woman"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "a man and a woman cooking in a wok on the stove"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a woman is speaking while dishes are clanging and music is playing in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["women speak and laugh as wind blows", "a toilet flushes and a female speaks"], "sample_ids": ["un9VQlzgZM", "yaln9y8I7ms"], "start_seconds": ["5", "230"], "properties": ["wind, speak, laugh", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["someone whistles a tune", "multiple people speak and children yell while water gurgles"], "sample_ids": ["sIXTftIuUgw", "vb1fPSDI4c"], "start_seconds": ["90", "30"], "properties": ["someone, tune, whistle", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person whistling a song", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a dog whimpers and a woman briefly talks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["y1saVTXsKwc", "uZesmtKZGSw"], "start_seconds": ["80", "250"], "properties": ["a, dog, talk", "men, talk, cars"], "captions_pred_video": ["a dog playing with a pink ball", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a dog barks and a man speaks", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about a dog and a woman talking?", "label": 0}, {"captions": ["a motorcycle idles loudly as wind blows", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["v7jJS8aAyA", "tDlysoZiA1I"], "start_seconds": ["10", "0"], "properties": ["wind, blows, loudly", "animal, grunts, chirps"], "captions_pred_video": [null, "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "birds are chirping and a rooster is crowing "], "question": "which entity is quieter", "label": 1}, {"captions": ["paper is crumpling consistently", "a man speaks as a motor runs in the background"], "sample_ids": ["v5cSxLaHADY", "xZepNM9qcRA"], "start_seconds": ["0", "30"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "background, motor, run"], "captions_pred_video": ["footage of the person holding a pair of scissors", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["paper is crumpled and crinkled", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["birds chirp and a dog breathes heavily", "a car accelerates and wind blows"], "sample_ids": ["y2ZBGpgbhHM", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["dog, chirp, breathe", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds chirping and a dog panting", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "a clock ticktocks"], "sample_ids": ["x5cuQjOdM3E", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["cat, talk, meow", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a black background with an airplane flying in the sky", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a cat meows and a woman speaks", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["paper is crumpling consistently", "a woman and man are speaking"], "sample_ids": ["v5cSxLaHADY", "vbpKkWvfOu4"], "start_seconds": ["0", "560"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "two people, speaking, woman, man"], "captions_pred_video": ["footage of the person holding a pair of scissors", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["paper is crumpled and crinkled", "a woman is speaking and a man is speaking"], "question": "which entity is a video of two people speaking?", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vms5XGTDVQc", "vJ7JPEFhyLA"], "start_seconds": ["220", "16"], "properties": ["paper, crumpled, crinkled", "three men, wind, flow"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["paper is crumpled and crinkled", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a video of a man talking while wind blows and some liquid flows?", "label": 0}, {"captions": ["small dogs yip and bark sharply", "a car speeding up in the distance"], "sample_ids": ["v-wcQf4BDY0", "u0TrcHhkPQ"], "start_seconds": ["120", "20"], "properties": ["bark, yip, sharply", "distance, car, speed"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", null], "captions_pred_audio": ["a dog barks and growls", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a person speaks over rustling leaves", "some tunes played by whistling"], "sample_ids": ["zOZleIRqZm4", "u6BnG6YZqJ4"], "start_seconds": ["80", "0"], "properties": ["rustling, leaves, person", "tune, play, whistling"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["tDVADusiIoc", "tw76HGONaKg"], "start_seconds": ["60", "570"], "properties": ["man, radio, blows", "A, game, keyboard"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man speaks and types on a computer keyboard "], "question": "which man is speaking", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "people applaud and hoot and chat quietly"], "sample_ids": ["su6FAOcOA8c", "wwyfGO2J4"], "start_seconds": ["4", "90"], "properties": ["engine, idle, woman", "people, applaud, hoot"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", null], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a person speaks over rustling leaves", "wind blowing followed by a zoom"], "sample_ids": ["zOZleIRqZm4", "vr8ZXjEBhMQ"], "start_seconds": ["80", "150"], "properties": ["rustling, leaves, person", "wind, blow, zoom"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a dark barks and whimpers", "pigeons vocalize and birds chirp"], "sample_ids": ["sYj4hpDUZDQ", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["barks, whimpers, dark", "vocalize, bird, chirp"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", "of the pigeon in the cage"], "captions_pred_audio": ["a dog barks and a cat meows", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "a person snores loudly multiple times at a close distance"], "sample_ids": ["w2M4i1mklOA", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["alarm, gears, turn", "loud, multiple, distance"], "captions_pred_video": ["footage of an antique clock", null], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a bird is chirping and tweeting a bird song", "an adult male speaks and dials a rotary phone"], "sample_ids": ["wPz6QRAkEb4", "tK4VlLsNxak"], "start_seconds": ["60", "120"], "properties": ["chirps, tweets, song", "An adult male speaks, dials, and speaks into a rotary phone"], "captions_pred_video": ["a bird in a cage on top of a pole", "person is wearing a headset and holding a remote control in his hand"], "captions_pred_audio": ["birds are chirping in the background ", "a man is speaking and using a sewing machine"], "question": "which entity is speaking", "label": 1}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "a child speaks in closed space"], "sample_ids": ["w9lpbUn0hPc", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["male, wind, rustling", "child, space, speak"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a man speaks as water trickles down a stream", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["sapQIQUhFc", "uEU-Hg5MTN8"], "start_seconds": ["280", "27"], "properties": ["water, stream, trickles", "animal, grunts, snorts"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "a telephone rings followed by a woman talking"], "sample_ids": ["xjhAnI2q6hM", "tGcFnX0GHI"], "start_seconds": ["6", "0"], "properties": ["engine revs, vehicle, people", "ring, talk, woman"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", null], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "a man speaks as a car is passing by"], "sample_ids": ["y2bVZ7rz-5M", "sK4u5T8hW78"], "start_seconds": ["280", "30"], "properties": ["engine, horn, siren", "a, car, pass"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["a race car approaches quickly and slows down squealing tires", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sEprKHm8Sj8", "yajyRTUQk3U"], "start_seconds": ["90", "400"], "properties": ["car, tires, slows", "a woman, something, fried"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "a vehicle engine accelerating then running on idle"], "sample_ids": ["sHbXC6na9hg", "vYkA3cfXp5Q"], "start_seconds": ["0", "30"], "properties": ["a person, saw, wood", "engine, accelerate, idle"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["an engine is idling and vibrating", "an engine is idling"], "question": "which entity is a machine", "label": 1}, {"captions": ["continuous snoring", "a car accelerates and wind blows"], "sample_ids": ["sLkeqCDJIyw", "u0TrcHhkPQ"], "start_seconds": ["120", "20"], "properties": ["loud, snoring, noise", "accelerates, wind, blows"], "captions_pred_video": [", what is the man doing on the couch? sleeping", null], "captions_pred_audio": ["a person is snoring loudly", "a race car accelerates and revs its engine "], "question": "which entity is not a noise", "label": 1}, {"captions": ["bees buzz and wind blows", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["tMJne1a4AFI", "vbZ-0lGPneg"], "start_seconds": ["0", "30"], "properties": ["bees buzz, wind blows, bees", "a woman, a television program, a bird"], "captions_pred_video": ["a swarm of bees on the ground", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a swarm of bees buzzing around", "a woman is speaking and a dog is whimpering"], "question": "which entity is a video of a television program?", "label": 1}, {"captions": ["several ducks are quacking and squawking", "multiple people speak and children yell while water gurgles"], "sample_ids": ["wfHeoPDLMaM", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["quacking, squawking, ducks", "multiple, people, yell"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", null], "captions_pred_audio": ["ducks are quacking", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["water gurgles, metal squeaks and the water stops", "water splashes and a door squeaks"], "sample_ids": ["x4a9YGIw4ok", "sdXV-ylviw"], "start_seconds": ["120", "190"], "properties": ["water, gurgles, stops", "sound, splash, door"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a toilet flushes and water splashes", "a dog barks and taps with background noise "], "question": "which entity has a door squeak?", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "some tunes played by whistling"], "sample_ids": ["spYNpeN7rPY", "u6BnG6YZqJ4"], "start_seconds": ["1", "0"], "properties": ["a clock, ticktock, man", "tune, play, whistling"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a person whistling a song"], "question": "which entity is playing tunes", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "a horn rings out as a machine runs by"], "sample_ids": ["x6ijhqRY38s", "slZLHwNbbt4"], "start_seconds": ["250", "300"], "properties": ["something metal, glass, hit", "a, horn, run"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is about a machine running by?", "label": 1}, {"captions": ["bees buzz as wind blows", "a man speaks followed by another man speaking outside"], "sample_ids": ["tMJne1a4AFI", "viuTg1M-dqg"], "start_seconds": ["0", "30"], "properties": ["bees, buzz, wind", "two men, speak, follow"], "captions_pred_video": ["a swarm of bees on the ground", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a single speaker?", "label": 0}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "pigeons vocalize and birds chirp"], "sample_ids": ["uEU-Hg5MTN8", "uiS58TNyUiw"], "start_seconds": ["27", "430"], "properties": ["animal, grunts, snorts", "vocalize, bird, chirp"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "of the pigeon in the cage"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["food is frying while a woman speaks", "someone snores nearby"], "sample_ids": ["yhQ2Lg-7qDY", "spJCm8tD9Zo"], "start_seconds": ["130", "90"], "properties": ["food, woman, speak", "someone snores, nearby, someone"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a person is snoring loudly"], "question": "what is a person doing in the first picture?", "label": 0}, {"captions": ["ticking continues without interruption", "a stream of water flows as people talk and wind blows"], "sample_ids": ["v-g-j2uTByM", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["ticking, continuous, clock", "stream, water, flow"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "footage is blurry and out of focus"], "captions_pred_audio": ["a clock is ticking loudly", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "a person is whistling a tune"], "sample_ids": ["xERFUeZONz8", "scYRUkrFLiQ"], "start_seconds": ["0", "30"], "properties": ["ring, approach, traffic", "a, tune, whistle"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "of the man wearing a bow tie and a suit jacket in front of a red door"], "captions_pred_audio": ["an emergency vehicle siren blares", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "an infant crying as a woman laughs"], "sample_ids": ["rqu8iB22IY", "xhmRY9yhC7c"], "start_seconds": ["5", "20"], "properties": ["sound, repeats, laugh", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["birds chirp and objects are moved around", "a wooden clack accompanies nearby chirping birds"], "sample_ids": ["yPUYU6t3rwo", "yeFvk9x0wWI"], "start_seconds": ["370", "30"], "properties": ["birds chirp, objects are moved around, birds", "clack, bird, chirp"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["insects buzz and a man speaks", "birds chirp in the background as a car drives by "], "question": "which entity is about birds?", "label": 0}, {"captions": ["multiple birds vocalize and wind blows", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["uoGVs9yUqY4", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["multiple, vocalize, wind", "applause, audience, yells"], "captions_pred_video": ["for how to make a wooden shed door youtube", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a machine runs continuously", "vehicles pass by on a roadway"], "sample_ids": ["wdXV3Pv0jiY", "tgbONvsP47Y"], "start_seconds": ["11", "0"], "properties": ["machine, running, continuously", "pass, vehicle, roadway"], "captions_pred_video": ["footage is blurry and shaky", "footage of a fire truck entering a garage"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["yeFvk9x0wWI", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["clack, bird, chirp", "airplane, boy, fly"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "people applaud and hoot and chat quietly"], "sample_ids": ["uqFtmnhuqA8", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["a, b, c", "people, applaud, hoot"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", null], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds twitter and chirp and clatter", "wind noise takes place into a microphone while rustling occurs"], "sample_ids": ["yeFvk9x0wWI", "w8uLijTqtlU"], "start_seconds": ["30", "70"], "properties": ["chirp, twitter, clatter", "wind, microphone, noise"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage is blurry and shaky"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "the wind is blowing strongly"], "question": "which noise is not made by birds", "label": 1}, {"captions": ["a child yells and another yells", "a infant makes noise and is excited"], "sample_ids": ["vMDHu7Lxcgw", "wIJK3-5y0kA"], "start_seconds": ["410", "30"], "properties": ["two, yell, child", "noise, excited, infant"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a baby cries and a woman speaks"], "question": "which entity is more excited", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "a man speaks followed by another man speaking outside"], "sample_ids": ["y8WEcpOlT3I", "viuTg1M-dqg"], "start_seconds": ["40", "30"], "properties": ["wind, speak, buffeting", "two men, speak, follow"], "captions_pred_video": ["on how to use a sewing machine youtube", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity shows two men speaking?", "label": 0}, {"captions": ["white noise and birds chirping", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["wRBHTgrbiwg", "zl9Dqx-j7q4"], "start_seconds": ["50", "6"], "properties": ["noise, white, chirping", "engine, laugh, loud"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a stream of water runs briefly", "wind blows as people chatter quietly"], "sample_ids": ["x-PeY8Yb8M4", "xBxDz0CFVn0"], "start_seconds": ["300", "30"], "properties": ["stream, water, run", "wind, chatter, people"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "footage is blurry and out of focus"], "captions_pred_audio": ["a car is driving on a wet road ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["zuua6-5goWw", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["birds, chirp, quiet, man, speaks", "stream, water, flow"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "footage is blurry and out of focus"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks as crickets sing", "a man speaks, then dials a rotary telephone"], "sample_ids": ["ryFDPxgDOGc", "tK4VlLsNxak"], "start_seconds": ["570", "120"], "properties": ["a, crickets, sing", "a, dial, telephone"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", "person is wearing a headset and holding a remote control in his hand"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking and using a sewing machine"], "question": "which entity is a man speaking to a rotary telephone?", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "a stream of water runs briefly"], "sample_ids": ["yeFvk9x0wWI", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["clack, bird, chirp", "stream, water, run"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["water running down a sink while a man is talking", "paper is crumpling consistently"], "sample_ids": ["vSeGhaZt-aI", "v5cSxLaHADY"], "start_seconds": ["50", "0"], "properties": ["water, sink, talk", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wztCSUxOf8", "vJ7JPEFhyLA"], "start_seconds": ["130", "16"], "properties": ["a crowd, yells, applauds", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 0}, {"captions": ["people speak and laugh as a child speaks", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["sa6TLVbooCc", "vlS6YMeWAPo"], "start_seconds": ["240", "40"], "properties": ["people, laugh, child", "sheep, baa, birds"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["yajyRTUQk3U", "yajyRTUQk3U"], "start_seconds": ["400", "400"], "properties": ["noise, woman, speak", "a woman, something, fried"], "captions_pred_video": ["- a woman cooking in the kitchen", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a woman is speaking while food is frying in the background"], "question": "which woman is speaking over sizzling noise", "label": 0}, {"captions": ["water runs into a sink while men speak", "a man is filing a hard object"], "sample_ids": ["vzceMbklWc", "vveS8HT7Uog"], "start_seconds": ["180", "100"], "properties": ["water, sink, run", "a man, hard, object"], "captions_pred_video": [null, "footage is of a workbench with various tools on it including a hammer and a screwdriver"], "captions_pred_audio": ["water is running and a man is speaking", "a man is filing and speaking with background noise and breathing "], "question": "which object is harder to file", "label": 0}, {"captions": ["a propeller moves loudly nearby", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["ugHJF0hfYkg", "vfYTJq7nU"], "start_seconds": ["10", "130"], "properties": ["loud, propeller, move", "rustling, ducks, quack"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a duck quacks and a woman speaks"], "question": "which entity is quieter", "label": 1}, {"captions": ["a toilet flushes and water drains", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sfAvvZwdLCY", "uYT5gxnyMWM"], "start_seconds": ["20", "50"], "properties": ["water drains, flushes, water", "female, spraying, scream"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "loud, continuous burping"], "sample_ids": ["x5cuQjOdM3E", "y636gklDioE"], "start_seconds": ["30", "20"], "properties": ["cat, meows, young woman", "loud, continuous, burping"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a dog sitting on a red chair in front of an old telephone"], "captions_pred_audio": ["a cat meows and a woman speaks", "a person burps loudly several times"], "question": "which is louder", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "an engine runs loudly"], "sample_ids": ["sOa7g-44Dag", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["audio, scratching, man", "loud, engine, run"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "some tunes played by whistling"], "sample_ids": ["soTOh3zYJfY", "u6BnG6YZqJ4"], "start_seconds": ["40", "0"], "properties": ["vehicle, skid, tires", "tune, play, whistling"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a person whistling a song"], "question": "which entity is playing a tune", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "pigeons vocalize and birds chirp"], "sample_ids": ["vms5XGTDVQc", "uiS58TNyUiw"], "start_seconds": ["220", "430"], "properties": ["paper, crumpled, crinkled", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "of the pigeon in the cage"], "captions_pred_audio": ["paper is crumpled and crinkled", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "some tunes played by whistling"], "sample_ids": ["xl2PIWyXaM", "u6BnG6YZqJ4"], "start_seconds": ["160", "0"], "properties": ["chirp, man, younger person", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["birds are chirping and people are talking", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a machine beeps continuously", "people cheer as a vehicle engine revs"], "sample_ids": ["y682ml90jGw", "xjhAnI2q6hM"], "start_seconds": ["11", "6"], "properties": ["beeps, machine, continuously", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a beeping sound is being made ", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "some tunes played by whistling"], "sample_ids": ["sxYkFKFIZD0", "u6BnG6YZqJ4"], "start_seconds": ["20", "0"], "properties": ["screech, man, door", "tune, play, whistling"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "a telephone rings followed by a woman talking"], "sample_ids": ["su6FAOcOA8c", "tGcFnX0GHI"], "start_seconds": ["4", "0"], "properties": ["engine, idle, woman", "ring, talk, woman"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", null], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a recording", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["vZAw4apG0Es", "wSVhSdj0F0"], "start_seconds": ["30", "10"], "properties": ["background, tick, repeat", "horn honks, keys jingle, electronic beep"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a car horn honks and keys jangle with background noise "], "question": "which entity has a horn honk?", "label": 1}, {"captions": ["an audience gives applause", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["x6iCUDmRpKQ", "xKB8O8LTs6s"], "start_seconds": ["38", "70"], "properties": ["applause, audience, give", "music, gunfire, explosion"], "captions_pred_video": ["a black background with the moon and stars in the sky", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a group of people are clapping and cheering", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["x5cuQjOdM3E", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["cat, meows, young woman", "loud, multiple, distance"], "captions_pred_video": ["a black background with an airplane flying in the sky", null], "captions_pred_audio": ["a cat meows and a woman speaks", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["water flows followed by women screaming", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["w5W5Kqtc8E", "zl9Dqx-j7q4"], "start_seconds": ["100", "6"], "properties": ["water, flow, women", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a jet engine roars "], "question": "which entity is followed by laughter", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vf9xf3vMsGM", "vb1fPSDI4c"], "start_seconds": ["540", "30"], "properties": ["A man speaks while turning a water faucet on.", "multiple, people, yell"], "captions_pred_video": ["of the person washing their hands under the faucet", null], "captions_pred_audio": ["a man is speaking while water is running in the background", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "a vehicle engine accelerates and wind blows"], "sample_ids": ["xl2PIWyXaM", "wudZTNBtVqc"], "start_seconds": ["160", "60"], "properties": ["chirp, man, younger person", "accelerates, engine, wind"], "captions_pred_video": [null, "footage is of a parking lot with cars parked in it"], "captions_pred_audio": ["birds are chirping and people are talking", "a car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "water flows as men speak and yell"], "sample_ids": ["wTideSjRFS0", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["food, sizzle, woman", "water, flow, men"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water flowing?", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "roadway noise occurs and a truck accelerates"], "sample_ids": ["wnpJndXuxLc", "tgbONvsP47Y"], "start_seconds": ["50", "0"], "properties": ["blows, vehicle, train", "noise, truck, accelerate"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a car is driving on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a baby cries and a woman moans", "a stream of water runs briefly"], "sample_ids": ["smDKStoHBJo", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["a, cry, woman", "stream, water, run"], "captions_pred_video": ["a man holding a crying baby in his arms", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "some men converse over an engine running"], "sample_ids": ["uWAAAL4CIoc", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["a woman, chirps, animal", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a woman speaking and an animal chirps?", "label": 0}, {"captions": ["three men talk while wind blows and some liquid flows", "winds blows roughly as a vehicle races past"], "sample_ids": ["vJ7JPEFhyLA", "xjvTpk2Zpr8"], "start_seconds": ["16", "70"], "properties": ["three men, wind, flow", "wind, blows, vehicle"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a jet engine roars and wind blows "], "question": "which entity shows a vehicle racing past?", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "an engine runs loudly"], "sample_ids": ["y2bVZ7rz-5M", "vqZuVbG6-HI"], "start_seconds": ["280", "130"], "properties": ["engine, horn, siren", "loud, engine, run"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a lawn mower is running and men are speaking "], "question": "which entity has a louder engine", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "water is sprayed across a hard surface"], "sample_ids": ["sEprKHm8Sj8", "sQwlkXjQabo"], "start_seconds": ["90", "10"], "properties": ["car, tires, slows", "water, spray, surface"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "spraying followed by silence"], "question": "which is a liquid", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "water is sprayed across a hard surface"], "sample_ids": ["tK4VlLsNxak", "sQwlkXjQabo"], "start_seconds": ["120", "10"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "water, spray, surface"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "wind blows as people chatter quietly"], "sample_ids": ["u5RmF3c3Aw", "xBxDz0CFVn0"], "start_seconds": ["60", "30"], "properties": ["engine, car, zoom", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "small dogs yip and bark sharply"], "sample_ids": ["wqZ135Ssz0", "v-wcQf4BDY0"], "start_seconds": ["60", "120"], "properties": ["two men, woman, birds", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a person is whistling", "waves crash against a shoreline and people speak"], "sample_ids": ["sIXTftIuUgw", "yFB25fqfU8I"], "start_seconds": ["90", "300"], "properties": ["person, whistling, person", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["a person whistling a song", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more calm", "label": 0}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["uEU-Hg5MTN8", "tDVADusiIoc"], "start_seconds": ["27", "60"], "properties": ["a woman, laughs, animal", "water, radio, man"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["a man speaking with light rustling", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["zOZleIRqZm4", "uEU-Hg5MTN8"], "start_seconds": ["80", "27"], "properties": ["light, rustling, man", "a woman, laughs, animal"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a recording", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "an infant crying frantically"], "sample_ids": ["xfaoyyzw2WU", "zwOBqeFTgiU"], "start_seconds": ["180", "30"], "properties": ["loud, jet engine, roar", "cry, infant, frantically"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "of the baby crying in the car seat"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a baby cries loudly"], "question": "which is louder", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "birds chirp and objects are moved around"], "sample_ids": ["zkKdxzNC97Y", "yPUYU6t3rwo"], "start_seconds": ["27", "370"], "properties": ["loud, bang, noise", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a door is opened and closed", "insects buzz and a man speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person speaks briefly", "people applaud and hoot and chat quietly"], "sample_ids": ["zOZleIRqZm4", "wwyfGO2J4"], "start_seconds": ["80", "90"], "properties": ["person, talk, brief", "people, applaud, hoot"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "a propeller rotates loudly and intensely"], "sample_ids": ["uiItxDsDMFI", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["wood, piece, saw", "loud, intense, propeller"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a saw is being used with background noise ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a helicopter engine runs", "a car speeding up in the distance"], "sample_ids": ["t5ZbXbniOWk", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["engine, helicopter, run", "distance, car, speed"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["an audience gives applause as a man yells and a group sings", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tdWhHV3X25Q", "yajyRTUQk3U"], "start_seconds": ["60", "400"], "properties": ["applause, audience, yells", "a woman, something, fried"], "captions_pred_video": ["a man is talking to another man on a stage in front of a microphone", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a woman is speaking while food is frying in the background"], "question": "which entity is a demonstration", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["sWZzXuWYY", "vfYTJq7nU"], "start_seconds": ["420", "130"], "properties": ["male, clanks, thumps", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a duck quacks and a woman speaks"], "question": "which entity is about a machine?", "label": 0}, {"captions": ["a woman speaks and then a man speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vbpKkWvfOu4", "tDVADusiIoc"], "start_seconds": ["560", "60"], "properties": ["a, man, speaks", "water, radio, man"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["yRx9txMcBl0", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["accelerates, tires, squeals", "a woman, laughs, animal"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "a drill runs and two people laugh"], "sample_ids": ["zdYdyF9-m8U", "tEE3MpBt1sg"], "start_seconds": ["7", "50"], "properties": ["wind, crash, shoreline", "two people, laugh, drill"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["waves crash and wind blows ", "people are laughing breathing and speaking with background noise "], "question": "which entity is stationary", "label": 1}, {"captions": ["a woman and man speak while food is frying", "a race car approaches quickly and slows down squealing tires"], "sample_ids": ["zk-xJGQU8-4", "sEprKHm8Sj8"], "start_seconds": ["130", "90"], "properties": ["food, man, woman", "car, tires, slows"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "an airplane engine spools and people speak"], "sample_ids": ["vbZ-0lGPneg", "wTjoRj1se3U"], "start_seconds": ["30", "390"], "properties": ["a woman, a television program, a bird", "airplane, engine, spool"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a jet engine is running and people are talking"], "question": "which entity has a bird in it?", "label": 0}, {"captions": ["heavy rain splashes as it falls", "a man speaks followed by another man speaking outside"], "sample_ids": ["wP8ZKrlx3oA", "viuTg1M-dqg"], "start_seconds": ["40", "30"], "properties": ["fall, rain, splash", "two men, speak, follow"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of two men speaking?", "label": 1}, {"captions": ["a motor idles, accelerates, then slows down.", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["vYkA3cfXp5Q", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["speed, idle, accelerate", "loud, laughter, intermittent"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["an engine is idling", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more like a person", "label": 1}, {"captions": ["a person is burping while a girl speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["vdoxuJn9lTc", "wwyfGO2J4"], "start_seconds": ["40", "90"], "properties": ["person, burp, girl", "people, applaud, hoot"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", null], "captions_pred_audio": ["a child speaks followed by a burp", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be at a party", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "small dogs yip and bark sharply"], "sample_ids": ["sapQIQUhFc", "v-wcQf4BDY0"], "start_seconds": ["280", "120"], "properties": ["water, trickles, flow", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "a dark barks and whimpers"], "sample_ids": ["zF8yoL0rkbI", "sYj4hpDUZDQ"], "start_seconds": ["30", "30"], "properties": ["engine, run, someone", "barks, whimpers, dark"], "captions_pred_video": ["footage of the traffic on the street at night", "a brown and white dog standing in front of a wall with its mouth open"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a dog barks and a cat meows"], "question": "which entity is more quiet", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "a child speaks in closed space"], "sample_ids": ["uC9dtII1KDI", "yW6FWLSLkx4"], "start_seconds": ["150", "40"], "properties": ["wind, gusts, distance", "child, space, speak"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a woman and man are speaking", "water flows as men speak and yell"], "sample_ids": ["vbpKkWvfOu4", "vJ7JPEFhyLA"], "start_seconds": ["560", "16"], "properties": ["two people, speaking, woman, man", "water, flow, men"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows a man and a woman speaking?", "label": 0}, {"captions": ["a man speaks and is typing on a keyboard", "paper folding and crinkling"], "sample_ids": ["x9JovgqUcs", "zPpG3RD8lSs"], "start_seconds": ["500", "20"], "properties": ["a, man, speaks, keyboard", "paper, fold, crinkle"], "captions_pred_video": [null, "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man speaks and types on a keyboard", "the wind blows and a mouse clicks "], "question": "which is not a person", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "someone is typing on a computer keyboard"], "sample_ids": ["rqu8iB22IY", "v0x1odnXtP0"], "start_seconds": ["5", "210"], "properties": ["sound, repeats, laugh", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a man woman speak while crickets sing", "people speak as gunfire rings out"], "sample_ids": ["zTLVJCo4WEE", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["a, crickets, sing", "gunfire, ring, speak"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a train horn blows as it passes by", "an airplane engine runs"], "sample_ids": ["zVacuqSb4LI", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["horn, blows, train", "engine, airplane, runs"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a car is driving by on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a stream of water runs briefly", "a man speaks as horns blow"], "sample_ids": ["x-PeY8Yb8M4", "tHyNqRyK34A"], "start_seconds": ["300", "24"], "properties": ["stream, water, run", "a, man, speaks"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "being taken from inside a vehicle on the street at night"], "captions_pred_audio": ["a car is driving on a wet road ", "a man is speaking and a car is honking with background noise "], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["an airplane engine spools and people speak", "a car speeding up in the distance"], "sample_ids": ["wTjoRj1se3U", "u0TrcHhkPQ"], "start_seconds": ["390", "20"], "properties": ["airplane, engine, spool", "distance, car, speed"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", null], "captions_pred_audio": ["a jet engine is running and people are talking", "a race car accelerates and revs its engine "], "question": "which object is moving faster", "label": 0}, {"captions": ["male speech with light ticking", "a woman speaks as she rubs two objects together"], "sample_ids": ["xO-Q2BlIIPU", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["male, speech, ticking", "two objects, woman, speak"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a woman speaking?", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["zcDwZ6W7E3E", "vb1fPSDI4c"], "start_seconds": ["180", "30"], "properties": ["man, speak, motorcycles", "multiple, people, yell"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", null], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a few ducks quack and scamper and a man speaks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["w2bYrCVLT60", "yajyRTUQk3U"], "start_seconds": ["120", "400"], "properties": ["ducks, speak, quack", "a woman, something, fried"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", "- a woman cooking in the kitchen"], "captions_pred_audio": ["ducks are quacking and a man is speaking", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "an airplane engine spools and people speak"], "sample_ids": ["sG7TyPnFDR0", "wTjoRj1se3U"], "start_seconds": ["180", "390"], "properties": ["beeps, machine, smoke alarm", "airplane, engine, spool"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a jet engine is running and people are talking"], "question": "which entity is a machine?", "label": 0}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "water splashes as an animal walks through"], "sample_ids": ["wP8ZKrlx3oA", "w1ir-sZ3Im8"], "start_seconds": ["40", "90"], "properties": ["rain, storm, thunder", "animal, water, splashes"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a heavy rain is falling on a surface", "water splashes and gurgles as people speak"], "question": "which entity is more likely to be in a storm", "label": 0}, {"captions": ["an engine runs and wind blows", "a machine beeps continuously"], "sample_ids": ["vs65y4qmyBE", "y682ml90jGw"], "start_seconds": ["340", "11"], "properties": ["engine, run, wind", "beeps, machine, continuously"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["xKB8O8LTs6s", "uZesmtKZGSw"], "start_seconds": ["70", "250"], "properties": ["music, radio, gunshots", "men, talk, cars"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "someone whistles a tune"], "sample_ids": ["uEU-Hg5MTN8", "sIXTftIuUgw"], "start_seconds": ["27", "90"], "properties": ["a woman, laughs, animal", "someone, tune, whistle"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["people speak softly as food sizzles", "metal clacking as food and oil sizzles followed by a woman talking"], "sample_ids": ["yhQ2Lg-7qDY", "vW4x7S1VfQc"], "start_seconds": ["130", "150"], "properties": ["food, sizzle, speak", "clacking, oil, woman"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "footage of a person cooking fish in a frying pan on a stove top"], "captions_pred_audio": ["a faucet is running and a man is speaking", "food sizzles in a frying pan"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["zofjfKhqLk8", "ukg5L09Wpvo"], "start_seconds": ["10", "150"], "properties": ["noise, stop, motor", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a man talks while a clock does ticktock"], "sample_ids": ["sK4u5T8hW78", "spYNpeN7rPY"], "start_seconds": ["30", "1"], "properties": ["a, car, pass", "a clock, ticktock, man"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and breathing with background noise "], "question": "which entity is a clock?", "label": 1}, {"captions": ["a person snoring", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["t8tv5YRMJUg", "zFjIWfSD-4"], "start_seconds": ["0", "410"], "properties": ["a person, snore, loud", "People, motor, brakes"], "captions_pred_video": ["of a man getting his face licked by another man", null], "captions_pred_audio": ["a person sniffs and breathes heavily", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is not a person?", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["xvDdE3zNf8Y", "tdWhHV3X25Q"], "start_seconds": ["120", "60"], "properties": ["a, female, speaks", "applause, audience, yells"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "people applaud and hoot and chat quietly"], "sample_ids": ["xyL9F5VrjkE", "wwyfGO2J4"], "start_seconds": ["20", "90"], "properties": ["wind, blows, vehicle", "people, applaud, hoot"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", null], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be in a theater", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vbpKkWvfOu4", "uZesmtKZGSw"], "start_seconds": ["560", "250"], "properties": ["a, man, speaks", "men, talk, cars"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a person is snoring while sleeping"], "sample_ids": ["yLy-WycbVVE", "vJrjSeP17yE"], "start_seconds": ["30", "40"], "properties": ["background, people, talk", "a person is sleeping, snoring, person"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "a black background with a small plane flying in the sky"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a person snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["tQWGZLItBXk", "uEU-Hg5MTN8"], "start_seconds": ["170", "27"], "properties": ["music, person, ding", "a woman, laughs, animal"], "captions_pred_video": ["worms revolution screenshots", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a woman is speaking and a baby is crying"], "question": "which entity has a person speaking?", "label": 0}, {"captions": ["a woman speaks as frying food sizzles", "a man speaks followed by another man speaking outside"], "sample_ids": ["wTideSjRFS0", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["food, sizzle, woman", "two men, speak, follow"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking with background noise and breathing sounds "], "question": "which entity shows two men speaking?", "label": 1}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "material crumbles into a microphone"], "sample_ids": ["t69a8aRKhmc", "vofpvUo6NAw"], "start_seconds": ["30", "220"], "properties": ["a, b, c", "material, crumbles, microphone"], "captions_pred_video": ["footage is blurry and out of focus", "person wrapping a toy car in a plastic bag"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "paper is being crumpled and crinkled"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "dishes cling together then a man begins to speak"], "sample_ids": ["w8uLijTqtlU", "sQGXqGcwOTc"], "start_seconds": ["70", "3"], "properties": ["wind, microphone, noise", "cling, speak, dishes"], "captions_pred_video": ["footage is blurry and shaky", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["the wind is blowing strongly", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vXlk0lIQBFo", "xKB8O8LTs6s"], "start_seconds": ["470", "70"], "properties": ["wind, speak, vocalize", "music, gunfire, explosion"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "dishes cling together then a man begins to speak"], "sample_ids": ["ylpYOorfH4o", "sQGXqGcwOTc"], "start_seconds": ["410", "3"], "properties": ["engine, running, wind", "cling, speak, dishes"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking and an engine is revving", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["plastic is tapped on while someone speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["wvKpEYswXO0", "zl9Dqx-j7q4"], "start_seconds": ["150", "6"], "properties": ["plastic, tap, speak", "engine, laugh, loud"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "a door slams shut roughly"], "sample_ids": ["w5W5Kqtc8E", "zkKdxzNC97Y"], "start_seconds": ["100", "27"], "properties": ["wind, engine, scream", "a door, slams, shut"], "captions_pred_video": [null, "footage of the door opening and closing in slow motion"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a door is opened and closed"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "birds chirp and objects are moved around"], "sample_ids": ["wztCSUxOf8", "yPUYU6t3rwo"], "start_seconds": ["130", "370"], "properties": ["a crowd, yells, applauds", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 0}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "someone is typing on a computer keyboard"], "sample_ids": ["sG7TyPnFDR0", "v0x1odnXtP0"], "start_seconds": ["180", "210"], "properties": ["beeps, machine, smoke alarm", "keyboard, type, computer"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a person is typing on a keyboard"], "question": "which entity is typing on a computer keyboard?", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "people applaud and hoot and chat quietly"], "sample_ids": ["sAam2NqGhLY", "wwyfGO2J4"], "start_seconds": ["20", "90"], "properties": ["snoring, breathing, child", "people, applaud, hoot"], "captions_pred_video": ["of a little girl sleeping on a couch", null], "captions_pred_audio": ["a person is snoring", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "some men converse over an engine running"], "sample_ids": ["u5RmF3c3Aw", "sCiy7QS1U"], "start_seconds": ["60", "300"], "properties": ["engine, car, zoom", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a video of a car zooming by?", "label": 0}, {"captions": ["a man yells and speaks as water splashes", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vimzuGQvdcU", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["a, man, yells", "wind, blow, vehicle"], "captions_pred_video": ["a group of people are rafting down a river", null], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a man yelling?", "label": 0}, {"captions": ["continuous chugging with birds chirping in the background", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["xM4joTqDVp4", "uYT5gxnyMWM"], "start_seconds": ["160", "50"], "properties": ["background, chirp, birds", "a, scream, girl"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream in the background", "label": 1}, {"captions": ["water pouring and bubbling", "an infant crying as a woman laughs"], "sample_ids": ["uyRfq-jKPpo", "xhmRY9yhC7c"], "start_seconds": ["50", "20"], "properties": ["water, bubbles, pouring", "a, laugh, infant"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["water is running from a faucet", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sWZzXuWYY", "wz7N8YRy74I"], "start_seconds": ["420", "30"], "properties": ["male, speech, banging", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "a car speeding up in the distance"], "sample_ids": ["wSVhSdj0F0", "u0TrcHhkPQ"], "start_seconds": ["10", "20"], "properties": ["beep, clang, footsteps", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "a clock ticktocks"], "sample_ids": ["voJh2gJxXhA", "v-g-j2uTByM"], "start_seconds": ["50", "30"], "properties": ["music, frog, croak", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["music is playing and crickets are chirping ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["people speak then an engine runs", "a jet engine spools up and takes off"], "sample_ids": ["uMTTDZ2mb4", "vBslzh7saPw"], "start_seconds": ["30", "90"], "properties": ["engine, run, people", "engine, spools, takes"], "captions_pred_video": [null, "a pickup truck carrying a large object down the road"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a jet engine roars and accelerates "], "question": "which entity is a video of an engine running?", "label": 0}, {"captions": ["a fly buzzes around loudly as birds chirp", "birds vocalize and chirp continuously"], "sample_ids": ["uJV8NDaHqqk", "w1mlz3Pe4fU"], "start_seconds": ["100", "300"], "properties": ["loud, fly, chirp", "vocalize, chirp, continuously"], "captions_pred_video": ["a bee hive in a wooden box", "of a bird in a cage"], "captions_pred_audio": ["a swarm of bees buzzing around", "birds are chirping and singing"], "question": "which entity is quieter", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "frogs croak and vocalize"], "sample_ids": ["weDbePuc-Xc", "yswmmRZFItk"], "start_seconds": ["40", "0"], "properties": ["cartoon character, music, vocalize", "croak, vocalize, frog"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "a close up of a frog in the water"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a frog is croaking"], "question": "which entity is a frog?", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["xl2PIWyXaM", "wwyfGO2J4"], "start_seconds": ["160", "90"], "properties": ["chirp, man, younger person", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and people are talking", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["an insect buzzes around continuously", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["v25l1jef3JY", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["buzzes, continuously, insect", "airplane, boy, fly"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "a man speaks followed by another man speaking outside"], "sample_ids": ["vf9xf3vMsGM", "viuTg1M-dqg"], "start_seconds": ["540", "30"], "properties": ["A man speaks while turning a water faucet on.", "two men, speak, follow"], "captions_pred_video": ["of the person washing their hands under the faucet", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking followed by another man speaking outside?", "label": 1}, {"captions": ["food is frying while a woman speaks", "a car accelerates and wind blows"], "sample_ids": ["yhQ2Lg-7qDY", "u0TrcHhkPQ"], "start_seconds": ["130", "20"], "properties": ["food, woman, speak", "accelerates, wind, blows"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", null], "captions_pred_audio": ["a faucet is running and a man is speaking", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["uRExseg-0XI", "w34HjHr6gAY"], "start_seconds": ["210", "30"], "properties": ["woman, man, water", "beeps, hit, woman"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "a clock ticktocks"], "sample_ids": ["y2bVZ7rz-5M", "v-g-j2uTByM"], "start_seconds": ["280", "30"], "properties": ["engine, horn, siren", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "a man talks followed by a woman shouting"], "sample_ids": ["sEprKHm8Sj8", "s3cTDAj31g"], "start_seconds": ["90", "80"], "properties": ["car, tires, slows", "man, talk, woman"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "a man speaks while water trickles and flows"], "sample_ids": ["x6ijhqRY38s", "sapQIQUhFc"], "start_seconds": ["250", "280"], "properties": ["bowl, silverware, man", "water, trickles, flow"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", null], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a man is speaking and a stream is flowing in the background "], "question": "which entity is about water flowing?", "label": 1}, {"captions": ["a woman and man speak while food is frying", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["zk-xJGQU8-4", "uEU-Hg5MTN8"], "start_seconds": ["130", "27"], "properties": ["food, man, woman", "a woman, laughs, animal"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "people applaud and hoot and chat quietly"], "sample_ids": ["y2bVZ7rz-5M", "wwyfGO2J4"], "start_seconds": ["280", "90"], "properties": ["motor noise, horn, siren", "people, applaud, hoot"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", null], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as a machine runs", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vD6lYD1l0BY", "yajyRTUQk3U"], "start_seconds": ["330", "400"], "properties": ["a, machine, run", "a woman, something, fried"], "captions_pred_video": ["game controller being held in the hands of the person", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["some clanking with distant murmuring", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["uMTTDZ2mb4", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["clanking, murmuring, distant", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a duck quacks continuously", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vh30P49Po6s", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["quacks, continuously, duck", "applause, audience, yells"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["people speak softly as food sizzles", "water flows as men speak and yell"], "sample_ids": ["yhQ2Lg-7qDY", "vJ7JPEFhyLA"], "start_seconds": ["130", "16"], "properties": ["food, sizzle, speak", "water, flow, men"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["food is frying then a woman speaks", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["ukxt9I7eMMg", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["food, woman, speak", "applause, audience, yells"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "a propeller rotates loudly and intensely"], "sample_ids": ["vXlk0lIQBFo", "ugHJF0hfYkg"], "start_seconds": ["470", "10"], "properties": ["wind, talk, vocalize", "loud, intense, propeller"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["xO-Q2BlIIPU", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["two men, exclamation, speak", "a, scream, girl"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking and a baby is crying"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "an airplane engine spools and people speak"], "sample_ids": ["s4Uz1Ffgo04", "wTjoRj1se3U"], "start_seconds": ["100", "390"], "properties": ["roars, background, people speaking", "airplane, engine, spool"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a jet engine is running and people are talking"], "question": "which entity is quieter", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "a man speaks as a vehicles passes by then a woman speaks"], "sample_ids": ["xjvTpk2Zpr8", "siJFXfGWgDk"], "start_seconds": ["70", "50"], "properties": ["engine, run, wind", "man, woman, vehicle"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a man is speaking and birds are chirping in the background "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "paper folding and crinkling"], "sample_ids": ["xZepNM9qcRA", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["background, motor, run", "paper, fold, crinkle"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "the wind blows and a mouse clicks "], "question": "which entity is more likely to be used in a classroom", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "someone is typing on a computer keyboard"], "sample_ids": ["x5cuQjOdM3E", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["cat, talk, meow", "keyboard, type, computer"], "captions_pred_video": ["a black background with an airplane flying in the sky", "how to make money on youtube in spanish"], "captions_pred_audio": ["a cat meows and a woman speaks", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["wsHBIgzs9Fs", "uYT5gxnyMWM"], "start_seconds": ["50", "50"], "properties": ["horn, continuous, buzzing", "female, spraying, scream"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a woman is speaking and a baby is crying"], "question": "which entity is more abrasive", "label": 1}, {"captions": ["a sleeping person emits a gravely snore", "an engine runs loudly"], "sample_ids": ["w2JXXIAdUdg", "vqZuVbG6-HI"], "start_seconds": ["10", "130"], "properties": ["emits, sleeping, person", "loud, engine, run"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["siJFXfGWgDk", "wwyfGO2J4"], "start_seconds": ["50", "90"], "properties": ["man, woman, vehicle", "people, applaud, hoot"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", null], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "an airplane engine runs"], "sample_ids": ["wRV8yMk886E", "yVPZ2MNWpms"], "start_seconds": ["0", "0"], "properties": ["liquid, spray, nozzle", "engine, airplane, runs"], "captions_pred_video": ["two cars are parked in a parking lot at night", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a car is driving by on the road "], "question": "which entity is a machine", "label": 1}, {"captions": ["a male speaks and another male speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["viuTg1M-dqg", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["two males, speaking, male", "people, applaud, hoot"], "captions_pred_video": ["footage of water coming out of a hole in the ground", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "music plays followed by gunshots and then an explosion"], "sample_ids": ["vf44CgrjT0A", "xKB8O8LTs6s"], "start_seconds": ["20", "70"], "properties": ["loud, long, person", "music, gunshots, explosion"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a loud burp", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is louder", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sAam2NqGhLY", "zj2R0XoFr5k"], "start_seconds": ["20", "50"], "properties": ["snoring, breathing, child", "airplane, boy, fly"], "captions_pred_video": ["of a little girl sleeping on a couch", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a person is snoring", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "a man speaks followed by another man speaking outside"], "sample_ids": ["xjvTpk2Zpr8", "viuTg1M-dqg"], "start_seconds": ["70", "30"], "properties": ["engine, run, wind", "two men, speak, follow"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a single person speaking?", "label": 0}, {"captions": ["some people speak", "a man speaks as a motor runs in the background"], "sample_ids": ["vbZ-0lGPneg", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "background, motor, run"], "captions_pred_video": ["of a man holding a baby duck in his hands", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vbZ-0lGPneg", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["a woman, a television program, a bird", "gun, shoot, water"], "captions_pred_video": ["of a man holding a baby duck in his hands", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["w6RTHR6AeAg", "yajyRTUQk3U"], "start_seconds": ["40", "400"], "properties": ["call, owl, screech", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["u5RmF3c3Aw", "xfaoyyzw2WU"], "start_seconds": ["60", "180"], "properties": ["engine, car, zoom", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["wind blows strongly", "a man speaks followed by another man speaking outside"], "sample_ids": ["w8uLijTqtlU", "viuTg1M-dqg"], "start_seconds": ["70", "30"], "properties": ["wind, blows, strongly", "two men, speak, follow"], "captions_pred_video": ["footage is blurry and shaky", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of two men speaking?", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sDSppXIlJrs", "sLUnaPT5gM8"], "start_seconds": ["27", "0"], "properties": ["microphone, water, wind", "loud, laughter, intermittent"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more like a scream", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "an adult man speaks over glass clinking"], "sample_ids": ["uYT5gxnyMWM", "u6jIvCtKarQ"], "start_seconds": ["50", "70"], "properties": ["a, scream, girl", "a, man, speaks"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage of a person using a blender on a stove top"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking and dishes are being moved with background noise "], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "an airplane engine runs"], "sample_ids": ["x9JovgqUcs", "yVPZ2MNWpms"], "start_seconds": ["500", "0"], "properties": ["a, man, speaks, keyboard", "engine, airplane, runs"], "captions_pred_video": [null, "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a car is driving by on the road "], "question": "which is a moving object", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["xyL9F5VrjkE", "uZesmtKZGSw"], "start_seconds": ["20", "250"], "properties": ["wind, blows, vehicle", "men, talk, cars"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wyllXV6PjKo", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["a kid, talk, cry", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman speaks and a baby cries", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "a grown man speaks and water bubbles and runs"], "sample_ids": ["smDKStoHBJo", "vSeGhaZt-aI"], "start_seconds": ["0", "50"], "properties": ["a, talk, baby, cry", "water, bubbles, run"], "captions_pred_video": ["a man holding a crying baby in his arms", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking and pouring liquid with background noise "], "question": "which entity has a baby?", "label": 0}, {"captions": ["a frog vocalizes while birds chirp", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["vMf1dLD6Sng", "y8WEcpOlT3I"], "start_seconds": ["6", "40"], "properties": ["frog, bird, vocalize", "harsh, wind, blows"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a frog croaks loudly", "a man is speaking with wind noise in the background "], "question": "which entity is not a frog?", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "a duck quacks continuously"], "sample_ids": ["t97k0cejSQE", "vh30P49Po6s"], "start_seconds": ["250", "30"], "properties": ["bird, chirp, insect", "quacks, continuously, duck"], "captions_pred_video": ["a bee on a purple thistle flower", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 0}, {"captions": ["a baby laugh at a sputter", "a man speaks as a car is passing by"], "sample_ids": ["sLUnaPT5gM8", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["laugh, sputter, baby", "a, car, pass"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["a man talks while vehicles pass by", "a telephone rings followed by a woman talking"], "sample_ids": ["sK4u5T8hW78", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["a, man, talk", "ring, talk, woman"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "three men talk while wind blows and some liquid flows"], "sample_ids": ["tOj4tdLRaA", "vJ7JPEFhyLA"], "start_seconds": ["70", "16"], "properties": ["woman, laugh, baby", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "an airplane engine spools and people speak"], "sample_ids": ["vzceMbklWc", "wTjoRj1se3U"], "start_seconds": ["180", "390"], "properties": ["water, faucet, sink", "airplane, engine, spool"], "captions_pred_video": [null, "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["water is running and a man is speaking", "a jet engine is running and people are talking"], "question": "which entity is a moving object", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "a frog croaks as other frogs croak in the background"], "sample_ids": ["tw76HGONaKg", "yswmmRZFItk"], "start_seconds": ["570", "0"], "properties": ["A, game, keyboard", "background, frog, croak"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "a close up of a frog in the water"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a frog is croaking"], "question": "which entity is a croaker", "label": 1}, {"captions": ["a stream runs then someone speaks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wbHTKEJZyhc", "sSMl2vc3ek"], "start_seconds": ["20", "20"], "properties": ["stream, run, someone", "loud, multiple, distance"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", null], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaking with light rustling", "people cheer as a vehicle engine revs"], "sample_ids": ["zOZleIRqZm4", "xjhAnI2q6hM"], "start_seconds": ["80", "6"], "properties": ["light, rustling, man", "engine revs, vehicle, people"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["people clap and speak in the distance", "multiple motorcycles pass by as a man speaks"], "sample_ids": ["wwyfGO2J4", "zcDwZ6W7E3E"], "start_seconds": ["90", "180"], "properties": ["clap, distance, speak", "man, speak, motorcycles"], "captions_pred_video": [null, "2 people riding motorcycles down a mountain road with trees lining the sides of the road"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a man is speaking while a car accelerates and revs its engine "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["someone whistles a tune", "people speak softly as food sizzles"], "sample_ids": ["sIXTftIuUgw", "yhQ2Lg-7qDY"], "start_seconds": ["90", "130"], "properties": ["someone, tune, whistle", "food, sizzle, speak"], "captions_pred_video": [null, "a pan filled with meat and sauce being cooked on a stove top"], "captions_pred_audio": ["a person whistling a song", "a faucet is running and a man is speaking"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "waves crash against a shoreline and people speak"], "sample_ids": ["uPDn2BFTHk", "yFB25fqfU8I"], "start_seconds": ["140", "300"], "properties": ["woman, laughs, speaks", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more calm", "label": 0}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "an insect buzzes around continuously"], "sample_ids": ["yZmhM1HcsyE", "v25l1jef3JY"], "start_seconds": ["4", "0"], "properties": ["engine, roar, water", "buzzes, continuously, insect"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "dishes cling together then a man begins to speak"], "sample_ids": ["zkKdxzNC97Y", "sQGXqGcwOTc"], "start_seconds": ["27", "3"], "properties": ["hard, surface, door", "cling, speak, dishes"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a door is opened and closed", "mechanisms are operating and water is splashing "], "question": "which entity is about a door slamming shut?", "label": 0}, {"captions": ["loud intermittent buzzing with intermittent laughter", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sLUnaPT5gM8", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["loud, laughter, intermittent", "three men, wind, flow"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vBHyYJ8pL0", "wDVMhEdTiVw"], "start_seconds": ["2", "30"], "properties": ["noise, door, opening", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["tw76HGONaKg", "ziUT9IFTkjg"], "start_seconds": ["570", "10"], "properties": ["A, game, keyboard", "background, birds, rustling"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", null], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "birds are chirping and a chime is ringing "], "question": "which entity is a video?", "label": 0}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "a car accelerates and wind blows"], "sample_ids": ["xNMovAf3o50", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["rain, thunder, music", "accelerates, wind, blows"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", null], "captions_pred_audio": ["thunder and rain with music playing in the background ", "a race car accelerates and revs its engine "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a small engine spits as it runs", "people applaud and hoot and chat quietly"], "sample_ids": ["sZvwOuuPGP0", "wwyfGO2J4"], "start_seconds": ["50", "90"], "properties": ["spits, engine, runs", "people, applaud, hoot"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", null], "captions_pred_audio": ["a medium engine is running ", "people are clapping and speaking with background noise "], "question": "which entity is a human activity", "label": 1}, {"captions": ["a person is whistling a tune", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["scYRUkrFLiQ", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["a, tune, whistle", "harsh, wind, blows"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a person whistling a song", "a man is speaking with wind noise in the background "], "question": "which entity is not a person?", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "some tunes played by whistling"], "sample_ids": ["zcDwZ6W7E3E", "u6BnG6YZqJ4"], "start_seconds": ["180", "0"], "properties": ["a, man, speak", "tune, play, whistling"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a person whistles a meandering tune", "a man sprays as a scraping occurs in the background"], "sample_ids": ["uFoga8sHpiw", "sOa7g-44Dag"], "start_seconds": ["90", "30"], "properties": ["person, tune, whistle", "background, man, spray"], "captions_pred_video": ["footage of a bird in a cage", "footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic"], "captions_pred_audio": ["a person whistles a song", "a man is speaking and rubbing his hands together "], "question": "which entity is more active", "label": 1}, {"captions": ["a helicopter engine runs", "pigeons vocalize and birds chirp"], "sample_ids": ["t5ZbXbniOWk", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["engine, helicopter, run", "vocalize, bird, chirp"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "of the pigeon in the cage"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a man is filing a hard object", "a man speaks as a motor runs in the background"], "sample_ids": ["vveS8HT7Uog", "xZepNM9qcRA"], "start_seconds": ["100", "30"], "properties": ["a man, hard, object", "background, motor, run"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which object is harder to file", "label": 0}, {"captions": ["continuous chugging with birds chirping in the background", "a frog croaks as other frogs croak in the background"], "sample_ids": ["xM4joTqDVp4", "yswmmRZFItk"], "start_seconds": ["160", "0"], "properties": ["background, chirp, birds", "background, frog, croak"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "a close up of a frog in the water"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a frog is croaking"], "question": "which entity is a frog?", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "multiple people speak and children yell while water gurgles"], "sample_ids": ["tDVADusiIoc", "vb1fPSDI4c"], "start_seconds": ["60", "30"], "properties": ["water, radio, man", "multiple, people, yell"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["an emergency siren wails as it passes", "people talk quietly in the distance, followed by a police car siren wailing"], "sample_ids": ["vGj1XLJvNrw", "wy1eKjR7KC0"], "start_seconds": ["0", "30"], "properties": ["wails, wails, pass", "people, talk, distance"], "captions_pred_video": ["footage of a police car driving down a city street", "two police officers riding motorcycles down the street"], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "a man is speaking and a siren is going off"], "question": "which entity is wails as it passes?", "label": 0}, {"captions": ["two women and a man talk while a kid cries", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["wyllXV6PjKo", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["a kid, talk, cry", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman speaks and a baby cries", "a woman is speaking and a baby is crying"], "question": "which entity has a female spraying and a female screaming?", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "three men talk while wind blows and some liquid flows"], "sample_ids": ["w2M4i1mklOA", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["alarm, gears, turn", "three men, wind, flow"], "captions_pred_video": ["footage of an antique clock", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows a clock?", "label": 0}, {"captions": ["people speak as gunfire rings out", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["wqTCwqVRDlk", "w5W5Kqtc8E"], "start_seconds": ["80", "100"], "properties": ["gunfire, ring, speak", "wind, blow, vehicle"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", null], "captions_pred_audio": ["a man is speaking and a gun is fired", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "multiple people speak and children yell while water gurgles"], "sample_ids": ["v5P-ThUCINM", "vb1fPSDI4c"], "start_seconds": ["400", "30"], "properties": ["background, chirp, bird", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and birds are chirping", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "water is sprayed across a hard surface"], "sample_ids": ["sa6TLVbooCc", "sQwlkXjQabo"], "start_seconds": ["240", "10"], "properties": ["people, laugh, child", "water, spray, surface"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a beep repeats multiple times", "a person snores loudly multiple times at a close distance"], "sample_ids": ["y682ml90jGw", "sSMl2vc3ek"], "start_seconds": ["11", "20"], "properties": ["beep, repeat, multiple", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["food is frying and sizzles", "a toilet flushes and water drains"], "sample_ids": ["zNRChLjqcU", "sfAvvZwdLCY"], "start_seconds": ["220", "20"], "properties": ["food is frying, sizzles, food", "water drains, flushes, water"], "captions_pred_video": [null, "footage of the toilet in the bathroom"], "captions_pred_audio": ["water is running from a faucet into a sink", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a man talks while vehicles pass by", "water flows as men speak and yell"], "sample_ids": ["sK4u5T8hW78", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["a, man, talk", "water, flow, men"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows a man talking while vehicles pass by?", "label": 0}, {"captions": ["multiple ducks quack continuously", "an infant crying frantically"], "sample_ids": ["wfHeoPDLMaM", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["multiple, quack, continuously", "cry, infant, frantically"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "of the baby crying in the car seat"], "captions_pred_audio": ["ducks are quacking", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vf9xf3vMsGM", "su6FAOcOA8c"], "start_seconds": ["540", "4"], "properties": ["A man speaks while turning a water faucet on.", "engine, idle, woman"], "captions_pred_video": ["of the person washing their hands under the faucet", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a woman is speaking and a subway train is moving "], "question": "which entity is a man speaking while turning a water faucet on?", "label": 0}, {"captions": ["a man talks followed by a woman shouting", "a machine beeps continuously"], "sample_ids": ["s3cTDAj31g", "y682ml90jGw"], "start_seconds": ["80", "11"], "properties": ["man, talk, woman", "beeps, machine, continuously"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a baby is crying", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["water flows as men speak and yell", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vJ7JPEFhyLA", "vfYTJq7nU"], "start_seconds": ["16", "130"], "properties": ["water, flow, men", "rustling, ducks, quack"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a duck quacks and a woman speaks"], "question": "which entity is about water?", "label": 0}, {"captions": ["wind blows as people chatter quietly", "people applaud and hoot and chat quietly"], "sample_ids": ["xBxDz0CFVn0", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["wind, chatter, people", "people, applaud, hoot"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a person is whistling", "plastic is tapped on while someone speaks"], "sample_ids": ["sIXTftIuUgw", "wvKpEYswXO0"], "start_seconds": ["90", "150"], "properties": ["person, whistling, person", "plastic, tap, speak"], "captions_pred_video": [null, "of the person preparing food in the kitchen"], "captions_pred_audio": ["a person whistling a song", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "a sleeping person emits a gravely snore"], "sample_ids": ["ylpYOorfH4o", "w2JXXIAdUdg"], "start_seconds": ["410", "10"], "properties": ["motor, run, steady", "emits, sleeping, person"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "a close up shot of a person's mouth with a toothbrush in it"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a person snoring and a dog whimpering"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a person screams glaringly", "a man speaks as a car is passing by"], "sample_ids": ["xC8kbrKJmco", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["glaringly, screams, person", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a goat is bleating ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more passive", "label": 1}, {"captions": ["water flows as men speak and yell", "a man speaks as a car is passing by"], "sample_ids": ["vJ7JPEFhyLA", "sK4u5T8hW78"], "start_seconds": ["16", "30"], "properties": ["water, flow, men", "a, car, pass"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more calm", "label": 1}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "birds chirp and objects are moved around"], "sample_ids": ["w9lpbUn0hPc", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["male, wind, rustling", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "a clock ticktocks"], "sample_ids": ["xBxDz0CFVn0", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["stream, water, flow", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage is blurry and out of focus", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a clock is ticking loudly"], "question": "which entity is a clock", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "a cat meows as a young woman speaks"], "sample_ids": ["zj2R0XoFr5k", "x5cuQjOdM3E"], "start_seconds": ["50", "30"], "properties": ["airplane, fly, overhead", "cat, meows, young woman"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "a black background with an airplane flying in the sky"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a cat meows and a woman speaks"], "question": "which entity is a pet", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["tEE3MpBt1sg", "zFjIWfSD-4"], "start_seconds": ["50", "410"], "properties": ["drill, something, laugh", "People, motor, brakes"], "captions_pred_video": ["footage is blurry due to the smoke in the air", null], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a drill?", "label": 0}, {"captions": ["children cheer as a man speaks then an audience screams", "paper folding and crinkling"], "sample_ids": ["vJvryTwuAV8", "zPpG3RD8lSs"], "start_seconds": ["16", "20"], "properties": ["audience, cheer, man", "paper, fold, crinkle"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "the wind blows and a mouse clicks "], "question": "which entity is a demonstration of folding and crinkling paper?", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["ylpYOorfH4o", "xKB8O8LTs6s"], "start_seconds": ["410", "70"], "properties": ["engine, running, wind", "music, gunfire, explosion"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and an engine is revving", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a clicking followed by some people laughing and a kid speaking", "pigeons vocalize and birds chirp"], "sample_ids": ["vz8868znkVQ", "uiS58TNyUiw"], "start_seconds": ["60", "430"], "properties": ["audio, click, kid speaking", "vocalize, bird, chirp"], "captions_pred_video": ["a video of a plane flying over a cloudy sky", "of the pigeon in the cage"], "captions_pred_audio": ["a baby is laughing and breathing with background noise ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["multiple birds vocalize and wind blows", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["uoGVs9yUqY4", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["multiple, vocalize, wind", "a woman, laughs, animal"], "captions_pred_video": ["for how to make a wooden shed door youtube", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vJ7JPEFhyLA", "xKB8O8LTs6s"], "start_seconds": ["16", "70"], "properties": ["three men, wind, flow", "music, gunfire, explosion"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more action", "label": 1}, {"captions": ["a child speaks in closed space", "a infant makes noise and is excited"], "sample_ids": ["yW6FWLSLkx4", "wIJK3-5y0kA"], "start_seconds": ["40", "30"], "properties": ["child, space, speak", "noise, excited, infant"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a baby cries and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["w0xsN8X18Y", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["music, surface, rain", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to cause water to slosh", "label": 1}, {"captions": ["a vehicle engine accelerating then running on idle", "a man speaks as a motor runs in the background"], "sample_ids": ["vYkA3cfXp5Q", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["engine, accelerate, idle", "background, motor, run"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["an engine is idling", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a video of a vehicle engine?", "label": 0}, {"captions": ["people speak and laugh as a child speaks", "people cheer as a vehicle engine revs"], "sample_ids": ["sa6TLVbooCc", "xjhAnI2q6hM"], "start_seconds": ["240", "6"], "properties": ["people, laugh, child", "engine revs, vehicle, people"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["an airplane engine spools and people speak", "people speak as gunfire rings out"], "sample_ids": ["wTjoRj1se3U", "wqTCwqVRDlk"], "start_seconds": ["390", "80"], "properties": ["airplane, engine, spool", "gunfire, ring, speak"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a jet engine is running and people are talking", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war", "label": 1}, {"captions": ["a train engine runs and a horn blows", "a stream of water runs briefly"], "sample_ids": ["zPX9o1uDiI", "x-PeY8Yb8M4"], "start_seconds": ["40", "300"], "properties": ["engine, horn, run", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "a car is driving on a wet road "], "question": "which entity is a moving object", "label": 0}, {"captions": ["a stream runs then someone speaks", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wbHTKEJZyhc", "tdWhHV3X25Q"], "start_seconds": ["20", "60"], "properties": ["stream, run, someone", "applause, audience, yells"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a woman speaks and is crumpling paper", "a clock ticktocks"], "sample_ids": ["xvDdE3zNf8Y", "v-g-j2uTByM"], "start_seconds": ["120", "30"], "properties": ["A, crumple, paper", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman speaks and crumples paper", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaks as crickets sing", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["ryFDPxgDOGc", "tdWhHV3X25Q"], "start_seconds": ["570", "60"], "properties": ["a, crickets, sing", "applause, audience, yells"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["someone snores nearby", "paper folding and crinkling"], "sample_ids": ["spJCm8tD9Zo", "zPpG3RD8lSs"], "start_seconds": ["90", "20"], "properties": ["someone snores, nearby, someone", "paper, fold, crinkle"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a person is snoring loudly", "the wind blows and a mouse clicks "], "question": "which entity is not a person", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "wind blowing followed by a zoom"], "sample_ids": ["yVumC9TGknc", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["humming, clock, birds", "wind, blow, zoom"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a series of beeps and chirps", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a man speaks while video game music plays with some clicking", "an infant crying as a woman laughs"], "sample_ids": ["tw76HGONaKg", "xhmRY9yhC7c"], "start_seconds": ["570", "20"], "properties": ["music, click, man", "a, laugh, infant"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a toilet flushes and water drains", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sfAvvZwdLCY", "uYT5gxnyMWM"], "start_seconds": ["20", "50"], "properties": ["water drains, flushes, water", "female, spraying, scream"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a toilet flushes and water sputters as it drains", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["smGI3C1NZc", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["water, drain, toilet", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking and laughing?", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a man speaks as a car is passing by"], "sample_ids": ["su6FAOcOA8c", "sK4u5T8hW78"], "start_seconds": ["4", "30"], "properties": ["engine, run, woman", "a, car, pass"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a woman making an announcement?", "label": 0}, {"captions": ["an engine sputters followed by a car zooming by", "females talk and laugh over gusting wind"], "sample_ids": ["u5RmF3c3Aw", "un9VQlzgZM"], "start_seconds": ["60", "5"], "properties": ["engine, car, zoom", "females, talk, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a woman is speaking and laughing with wind noise and breathing in the background "], "question": "which entity is talking", "label": 1}, {"captions": ["wind blowing followed by a zoom", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["vr8ZXjEBhMQ", "vlS6YMeWAPo"], "start_seconds": ["150", "40"], "properties": ["wind, blow, zoom", "sheep, baa, birds"], "captions_pred_video": ["is taken from a motorcycle's point of view", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a goat bleats and birds chirp"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a person is whistling a tune", "water splashes as an animal walks through"], "sample_ids": ["scYRUkrFLiQ", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["a, tune, whistle", "animal, water, splashes"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a person whistling a song", "water splashes and gurgles as people speak"], "question": "which entity is not a person?", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "multiple people speak and children yell while water gurgles"], "sample_ids": ["wAAkbZToh8", "vb1fPSDI4c"], "start_seconds": ["0", "30"], "properties": ["burp, laugh, speak", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man burps and a woman speaks", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a train horn blows as it passes by", "a vehicle accelerates and squeals tires"], "sample_ids": ["zVacuqSb4LI", "yRx9txMcBl0"], "start_seconds": ["30", "40"], "properties": ["horn, blows, train", "accelerates, tires, squeals"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a car is revving its engine and skidding "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["wsHBIgzs9Fs", "wz7N8YRy74I"], "start_seconds": ["50", "30"], "properties": ["horn, continuous, buzzing", "rooster, crow, background, men"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a bird?", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "some tunes played by whistling"], "sample_ids": ["uKCSGgof8gI", "u6BnG6YZqJ4"], "start_seconds": ["12", "0"], "properties": ["chirps, distance, signal", "tune, play, whistling"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "water pouring and bubbling"], "sample_ids": ["yI-KvObbDoY", "uyRfq-jKPpo"], "start_seconds": ["260", "50"], "properties": ["sound, smack, wind", "water, bubbles, pouring"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks as horns blow", "winds blows roughly as a vehicle races past"], "sample_ids": ["tHyNqRyK34A", "xjvTpk2Zpr8"], "start_seconds": ["24", "70"], "properties": ["a, man, speaks", "wind, blows, vehicle"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a jet engine roars and wind blows "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["yYEVLuqEytU", "zl9Dqx-j7q4"], "start_seconds": ["40", "6"], "properties": ["animal, pig, background", "engine, laugh, loud"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "footage of a man driving a car in the dark"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a jet engine roars "], "question": "which entity is followed by a man laughing?", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "a man speaks as a motor runs in the background"], "sample_ids": ["tPJvjq9QePY", "xZepNM9qcRA"], "start_seconds": ["40", "30"], "properties": ["animal, bleat, moo", "background, motor, run"], "captions_pred_video": ["a dog and a sheep in a barn", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a baby cries and a man speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "a man speaks as a car is passing by"], "sample_ids": ["vdoxuJn9lTc", "sK4u5T8hW78"], "start_seconds": ["40", "30"], "properties": ["burp, loud, girl", "a, car, pass"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a child speaks followed by a burp", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "a guy speaks with birds chirping in the background"], "sample_ids": ["uWAAAL4CIoc", "v5P-ThUCINM"], "start_seconds": ["0", "400"], "properties": ["a woman, chirps, animal", "background, chirp, bird"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a man is speaking and birds are chirping"], "question": "which entity has a bird chirping in the background?", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "waves crash against a shoreline and people speak"], "sample_ids": ["v0x1odnXtP0", "yFB25fqfU8I"], "start_seconds": ["210", "300"], "properties": ["keyboard, type, computer", "wave, crash, shoreline"], "captions_pred_video": ["how to make money on youtube in spanish", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a person is typing on a keyboard", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which is a natural phenomenon", "label": 1}, {"captions": ["a male speaks and another male speaks", "a man speaks as a machine runs"], "sample_ids": ["viuTg1M-dqg", "vD6lYD1l0BY"], "start_seconds": ["30", "330"], "properties": ["two males, speaking, male", "a, machine, run"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "game controller being held in the hands of the person"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and dishes are being washed "], "question": "which entity has a machine running", "label": 1}, {"captions": ["a consistent ticking pattern", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sCeWURVHfOM", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["ticking, pattern, clock", "music, gunfire, explosion"], "captions_pred_video": ["- a close-up view of the clock's inner workings", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["ticking of a clock", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "some men converse over an engine running"], "sample_ids": ["w2M4i1mklOA", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["alarm, gears, turn", "men, converse, engine"], "captions_pred_video": ["footage of an antique clock", null], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a clock?", "label": 0}, {"captions": ["a telephone rings followed by a woman talking", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["tGcFnX0GHI", "wz7N8YRy74I"], "start_seconds": ["0", "30"], "properties": ["ring, talk, woman", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster crow?", "label": 1}, {"captions": ["people speak as gunfire rings out", "an infant crying frantically"], "sample_ids": ["wqTCwqVRDlk", "zwOBqeFTgiU"], "start_seconds": ["80", "30"], "properties": ["gunfire, ring, speak", "cry, infant, frantically"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking and a gun is fired", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "a car speeding up in the distance"], "sample_ids": ["uWAAAL4CIoc", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["a woman, chirps, animal", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "a woman speaks as she rubs two objects together"], "sample_ids": ["vddP56-ogds", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["liquid, laughs, man", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["an animal quacks rapidly", "a loud engine muffles a man as he speaks"], "sample_ids": ["vh30P49Po6s", "xyx6eNVEYRY"], "start_seconds": ["30", "380"], "properties": ["animal, quacks, rapidly", "loud, engine, muffles"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of a helicopter landing on a runway at an airport"], "captions_pred_audio": ["a duck is quacking loudly", "an aircraft engine is running and a man is speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "a vehicle engine runs and someone speaks"], "sample_ids": ["w5W5Kqtc8E", "zF8yoL0rkbI"], "start_seconds": ["100", "30"], "properties": ["wind, blow, vehicle", "engine, run, someone"], "captions_pred_video": [null, "footage of the traffic on the street at night"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "the wind is blowing hard and water is splashing"], "question": "which entity has a vehicle engine running and someone speaking?", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "an infant crying as a woman laughs"], "sample_ids": ["wyllXV6PjKo", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["a baby, a woman, a man", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman speaks and a baby cries", "a baby cries and a woman speaks"], "question": "which entity is a video of a baby crying?", "label": 0}, {"captions": ["a toilet flushes and water drains", "wind blowing followed by a zoom"], "sample_ids": ["sfAvvZwdLCY", "vr8ZXjEBhMQ"], "start_seconds": ["20", "150"], "properties": ["water drains, flushes, water", "wind, blow, zoom"], "captions_pred_video": ["footage of the toilet in the bathroom", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a toilet is flushed", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["birds chirp and wind blows", "vehicles pass by on a roadway"], "sample_ids": ["sxIvBMSavMQ", "tgbONvsP47Y"], "start_seconds": ["210", "0"], "properties": ["birds, chirp, wind", "pass, vehicle, roadway"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "footage of a fire truck entering a garage"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a person sniffs and sneezes", "multiple people speak and children yell while water gurgles"], "sample_ids": ["uRlbY6aoBU", "vb1fPSDI4c"], "start_seconds": ["0", "30"], "properties": ["sneezes, person, sniffs", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is sneezing ", "a crowd of people are talking and laughing"], "question": "which entity is more active", "label": 1}, {"captions": ["water splashes and a door squeaks", "a telephone rings followed by a woman talking"], "sample_ids": ["sdXV-ylviw", "tGcFnX0GHI"], "start_seconds": ["190", "0"], "properties": ["sound, splash, door", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dog barks and taps with background noise ", "a dial tone sounds followed by a woman speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "a frog croaks as other frogs croak in the background"], "sample_ids": ["ugHJF0hfYkg", "yswmmRZFItk"], "start_seconds": ["10", "0"], "properties": ["loud, intense, propeller", "background, frog, croak"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a close up of a frog in the water"], "captions_pred_audio": ["a helicopter is flying overhead ", "a frog is croaking"], "question": "which is quieter", "label": 1}, {"captions": ["water running down a sink while a man is talking", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["vSeGhaZt-aI", "wqZ135Ssz0"], "start_seconds": ["50", "60"], "properties": ["water, sink, talk", "two men, woman, birds"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", null], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "a stream of water runs briefly"], "sample_ids": ["xl2PIWyXaM", "x-PeY8Yb8M4"], "start_seconds": ["160", "300"], "properties": ["chirp, man, younger person", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["birds are chirping and people are talking", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a machine runs continuously", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["wdXV3Pv0jiY", "yDoT73BWsdA"], "start_seconds": ["11", "10"], "properties": ["machine, running, continuously", "engine, revs, vehicle"], "captions_pred_video": ["footage is blurry and shaky", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a race car accelerates and revs its engine "], "question": "which machine is running continuously", "label": 0}, {"captions": ["a man speaks as a machine runs", "people speak as gunfire rings out"], "sample_ids": ["vD6lYD1l0BY", "wqTCwqVRDlk"], "start_seconds": ["330", "80"], "properties": ["a, machine, run", "gunfire, ring, speak"], "captions_pred_video": ["game controller being held in the hands of the person", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vddP56-ogds", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["liquid, laughs, man", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a weapon fires multiple times", "people applaud and hoot and chat quietly"], "sample_ids": ["sMC07Ucy7kg", "wwyfGO2J4"], "start_seconds": ["10", "90"], "properties": ["weapon, fire, multiple", "people, applaud, hoot"], "captions_pred_video": ["footage is from a car's point of view", null], "captions_pred_audio": ["people are clapping and speaking with background noise ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be used in a war", "label": 0}, {"captions": ["a cat meows and children speak", "people cheer as a vehicle engine revs"], "sample_ids": ["x5cuQjOdM3E", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["cat, speak, children", "engine revs, vehicle, people"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a cat meows and a woman speaks", "a truck is revving its engine and a man is speaking "], "question": "which entity is more likely to be in a car", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "waves crash against a shoreline and people speak"], "sample_ids": ["w2M4i1mklOA", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["loud, chime, bell", "wave, crash, shoreline"], "captions_pred_video": ["footage of an antique clock", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more quiet", "label": 1}, {"captions": ["someone is burping continuously", "a duck quacks loudly and continuously"], "sample_ids": ["y636gklDioE", "vh30P49Po6s"], "start_seconds": ["20", "30"], "properties": ["burps, burps, burps", "loud, continuous, quacks"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a person burps loudly several times", "a duck is quacking loudly"], "question": "which entity is making a noise", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["s6DESzUTGjY", "uYT5gxnyMWM"], "start_seconds": ["16", "50"], "properties": ["wind, laugh, woman", "a, scream, girl"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "two men speak as a buffeting wind blows"], "sample_ids": ["s6DESzUTGjY", "y8WEcpOlT3I"], "start_seconds": ["16", "40"], "properties": ["wind, laugh, woman", "wind, speak, buffeting"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more calm", "label": 0}, {"captions": ["some people speak", "a woman speaks happily and an animal chirps"], "sample_ids": ["vbZ-0lGPneg", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "a woman, chirps, animal"], "captions_pred_video": ["of a man holding a baby duck in his hands", null], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a woman is speaking and a dog is barking "], "question": "which entity has a more chirpy animal", "label": 1}, {"captions": ["a speedboat passes quickly on the water", "people applaud and hoot and chat quietly"], "sample_ids": ["tjmoSi330GM", "wwyfGO2J4"], "start_seconds": ["23", "90"], "properties": ["speed, water, boat", "people, applaud, hoot"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", null], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "people are clapping and speaking with background noise "], "question": "which entity is moving at a slower speed", "label": 1}, {"captions": ["a woman speaks and food sizzles while frying", "paper folding and crinkling"], "sample_ids": ["wTideSjRFS0", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["food, sizzle, woman", "paper, fold, crinkle"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "the wind blows and a mouse clicks "], "question": "which entity is not a person", "label": 1}, {"captions": ["an insect buzzes around continuously", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["v25l1jef3JY", "w5W5Kqtc8E"], "start_seconds": ["0", "100"], "properties": ["buzzes, continuously, insect", "wind, blow, vehicle"], "captions_pred_video": ["a black background with a cartoon character in the foreground", null], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["water splashes and wind noise is made into a microphone", "a woman speaks followed by another woman whimpering and speaking"], "sample_ids": ["sDSppXIlJrs", "xOZfdgAgJ9o"], "start_seconds": ["27", "40"], "properties": ["microphone, water, wind", "woman, whimpering, speaking"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "footage of a woman talking to a man in a doctor's office"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a woman is speaking and a baby is crying"], "question": "which entity is a recording of a woman speaking?", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "an engine runs loudly"], "sample_ids": ["v0x1odnXtP0", "vqZuVbG6-HI"], "start_seconds": ["210", "130"], "properties": ["keyboard, type, computer", "loud, engine, run"], "captions_pred_video": ["how to make money on youtube in spanish", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a person is typing on a keyboard", "a lawn mower is running and men are speaking "], "question": "which is quieter", "label": 0}, {"captions": ["goats bleat and people speak", "a car accelerates and wind blows"], "sample_ids": ["z5iUE5h0EPs", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["goats bleat, people speak, language", "accelerates, wind, blows"], "captions_pred_video": ["of the goat in the barn", null], "captions_pred_audio": ["a goat bleats and a man speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks as horns blow", "water flows as men speak and yell"], "sample_ids": ["tHyNqRyK34A", "vJ7JPEFhyLA"], "start_seconds": ["24", "16"], "properties": ["a, man, speaks", "water, flow, men"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "a person sniffs and sneezes"], "sample_ids": ["x9JovgqUcs", "uRlbY6aoBU"], "start_seconds": ["500", "0"], "properties": ["a, man, speaks, keyboard", "sneezes, person, sniffs"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks and types on a keyboard", "a man is sneezing "], "question": "which person is sick", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "someone whistles a tune"], "sample_ids": ["x9JovgqUcs", "sIXTftIuUgw"], "start_seconds": ["500", "90"], "properties": ["a, man, speaks, keyboard", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks and types on a keyboard", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vfYTJq7nU", "vJ7JPEFhyLA"], "start_seconds": ["130", "16"], "properties": ["rustling, ducks, quack", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a duck?", "label": 0}, {"captions": ["two men speak as a buffeting wind blows", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["y8WEcpOlT3I", "uZesmtKZGSw"], "start_seconds": ["40", "250"], "properties": ["wind, speak, buffeting", "men, talk, cars"], "captions_pred_video": ["on how to use a sewing machine youtube", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a person is whistling", "a telephone rings followed by a woman talking"], "sample_ids": ["sIXTftIuUgw", "tGcFnX0GHI"], "start_seconds": ["90", "0"], "properties": ["person, whistling, person", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person whistling a song", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a person talking?", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["wwyfGO2J4", "xfaoyyzw2WU"], "start_seconds": ["90", "180"], "properties": ["people, applaud, hoot", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "a stream of water runs briefly"], "sample_ids": ["tK4VlLsNxak", "x-PeY8Yb8M4"], "start_seconds": ["120", "300"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "stream, water, run"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a heavy rain falls endlessly", "a man laughs and speaks as cats purr and hiss"], "sample_ids": ["wP8ZKrlx3oA", "vVhthZ45k3Y"], "start_seconds": ["40", "30"], "properties": ["heavy, rain, fall", "cat, purr, hiss"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage is blurry and out of focus"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a man is speaking and a cat is meowing"], "question": "which entity is more likely to be a natural phenomenon", "label": 0}, {"captions": ["an engine starts and increases in power", "a car accelerates and wind blows"], "sample_ids": ["zjTG0gaGCUI", "u0TrcHhkPQ"], "start_seconds": ["80", "20"], "properties": ["power, increase, engine", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a jet engine roars as wind blows ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a small engine idles continuously", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["y5WII6cTH7k", "vbZ-0lGPneg"], "start_seconds": ["40", "30"], "properties": ["engine, idle, continuously", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a woman is speaking and a dog is whimpering"], "question": "which entity is not a television program?", "label": 0}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "some men converse over an engine running"], "sample_ids": ["xyL9F5VrjkE", "sCiy7QS1U"], "start_seconds": ["20", "300"], "properties": ["wind, motor, distance", "men, converse, engine"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", null], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man speaks while a motorcycle revs and accelerates "], "question": "which is a more active scene", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sU53zg9Jp7s", "uYT5gxnyMWM"], "start_seconds": ["380", "50"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "female, spraying, scream"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is more violent", "label": 1}, {"captions": ["food is frying while a woman speaks", "water pouring and bubbling"], "sample_ids": ["yhQ2Lg-7qDY", "uyRfq-jKPpo"], "start_seconds": ["130", "50"], "properties": ["food, woman, speak", "water, bubbles, pouring"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a faucet is running and a man is speaking", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "a man speaks as a car is passing by"], "sample_ids": ["sWZzXuWYY", "sK4u5T8hW78"], "start_seconds": ["420", "30"], "properties": ["male, speech, banging", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "a heavy rain falls endlessly"], "sample_ids": ["sofxkNWaP0s", "wP8ZKrlx3oA"], "start_seconds": ["30", "40"], "properties": ["wind, engine, louder", "heavy, rain, fall"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "footage of a flooded street in the middle of a desert with mountains in the background"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a heavy rain is falling on a surface"], "question": "which entity is falling", "label": 1}, {"captions": ["water flows and trickles", "a child speaks in closed space"], "sample_ids": ["tB7hWb9gTuQ", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["water, flow, trickle", "child, space, speak"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["water is splashing and gurgling", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is not a liquid", "label": 1}, {"captions": ["a church bell rings several times", "a speedboat passes quickly on the water"], "sample_ids": ["sUVVjE3Ucp8", "tjmoSi330GM"], "start_seconds": ["0", "23"], "properties": ["ring, bell, several", "speed, water, boat"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", "a person riding a jet ski on a lake with trees in the background"], "captions_pred_audio": ["a church bell is ringing ", "a motorboat speeds through water with wind noise "], "question": "which entity is moving faster", "label": 1}, {"captions": ["pigeons vocalize and birds chirp", "a man speaks as birds chirp and a vehicle passes nearby"], "sample_ids": ["uiS58TNyUiw", "siJFXfGWgDk"], "start_seconds": ["430", "50"], "properties": ["vocalize, bird, chirp", "a, bird, vehicle"], "captions_pred_video": ["of the pigeon in the cage", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a man is speaking and birds are chirping in the background "], "question": "which entity is about birds?", "label": 0}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "people speak as gunfire rings out"], "sample_ids": ["yZrFNS7GFBQ", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["pigeon, buzzes, insect", "gunfire, ring, speak"], "captions_pred_video": ["of the bird in the cage", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["an owl hoots in the background ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war", "label": 1}, {"captions": ["an animal quacks rapidly", "winds blows roughly as a vehicle races past"], "sample_ids": ["vh30P49Po6s", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["animal, quacks, rapidly", "wind, blows, vehicle"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a duck is quacking loudly", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["an airplane engine runs", "an insect buzzes around continuously"], "sample_ids": ["yVPZ2MNWpms", "v25l1jef3JY"], "start_seconds": ["0", "0"], "properties": ["engine, airplane, runs", "buzzes, continuously, insect"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a car is driving by on the road ", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tDVADusiIoc", "zj2R0XoFr5k"], "start_seconds": ["60", "50"], "properties": ["wind, radio, waves", "airplane, boy, fly"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["people speak softly as food sizzles", "a car speeding up in the distance"], "sample_ids": ["yhQ2Lg-7qDY", "u0TrcHhkPQ"], "start_seconds": ["130", "20"], "properties": ["food, sizzle, speak", "distance, car, speed"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", null], "captions_pred_audio": ["a faucet is running and a man is speaking", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "loud clanking and banging with brief male speech"], "sample_ids": ["wSVhSdj0F0", "sWZzXuWYY"], "start_seconds": ["10", "420"], "properties": ["horn honks, keys jingle, slam", "male, speech, banging"], "captions_pred_video": [null, null], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a sewing machine runs and a man speaks"], "question": "which entity is more quiet", "label": 0}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "a woman speaks as she rubs two objects together"], "sample_ids": ["xKB8O8LTs6s", "vzxHnu-SFEw"], "start_seconds": ["70", "80"], "properties": ["music, gunfire, explosion", "two objects, woman, speak"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "paper folding and crinkling"], "sample_ids": ["uWPRNLnpy7Y", "zPpG3RD8lSs"], "start_seconds": ["10", "20"], "properties": ["accelerate, laugh, vehicle", "paper, fold, crinkle"], "captions_pred_video": ["is taken from a car driving down the street", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a car accelerates and revs its engine ", "the wind blows and a mouse clicks "], "question": "which entity is a paper folding and crinkling?", "label": 1}, {"captions": ["a consistent ticking pattern", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sCeWURVHfOM", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["ticking, pattern, clock", "stream, water, flow"], "captions_pred_video": ["- a close-up view of the clock's inner workings", "footage is blurry and out of focus"], "captions_pred_audio": ["ticking of a clock", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "a man speaks followed by another man speaking outside"], "sample_ids": ["s4Uz1Ffgo04", "viuTg1M-dqg"], "start_seconds": ["100", "30"], "properties": ["water, rushes, vehicle", "two men, speak, follow"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of two men speaking?", "label": 1}, {"captions": ["a child babbles as a woman speaks", "water splashes as an animal walks through"], "sample_ids": ["wEBlkGWVWwE", "w1ir-sZ3Im8"], "start_seconds": ["260", "90"], "properties": ["a, babble, woman", "animal, water, splashes"], "captions_pred_video": ["shows a person writing on the whiteboard", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a man speaks as a car is passing by"], "sample_ids": ["vzxHnu-SFEw", "sK4u5T8hW78"], "start_seconds": ["80", "30"], "properties": ["two objects, woman, speak", "a, car, pass"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which object is moving", "label": 0}, {"captions": ["a person burps loudly for a long time nearby", "people applaud and hoot and chat quietly"], "sample_ids": ["vf44CgrjT0A", "wwyfGO2J4"], "start_seconds": ["20", "90"], "properties": ["loud, long, person", "people, applaud, hoot"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", null], "captions_pred_audio": ["a loud burp", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "a man speaks as a car is passing by"], "sample_ids": ["rwtmaKiCcQU", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["nozzle, depressed, spray can", "a, car, pass"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["spraying and people speaking", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["an electronic device bleeps once", "paper is crumpling consistently"], "sample_ids": ["tHJ6JSa8Y4", "v5cSxLaHADY"], "start_seconds": ["0", "0"], "properties": ["bleeps, electronic, device", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a clock is ticking and beeping", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a woman speaks and other women and a man talk with her", "several insects fly while two men talk"], "sample_ids": ["vbpKkWvfOu4", "s-T9OVOiMLo"], "start_seconds": ["560", "330"], "properties": ["a, woman, man", "several, fly, men"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more people", "label": 0}, {"captions": ["an emergency siren wails as it passes", "a man speaks as a car is passing by"], "sample_ids": ["vGj1XLJvNrw", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["wails, wails, pass", "a, car, pass"], "captions_pred_video": ["footage of a police car driving down a city street", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "a man is speaking with background noise and breathing sounds "], "question": "which entity is moving", "label": 1}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "a duck quacks continuously"], "sample_ids": ["wP8ZKrlx3oA", "vh30P49Po6s"], "start_seconds": ["40", "30"], "properties": ["rain, storm, thunder", "quacks, continuously, duck"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks as bees buzz and birds chirp", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["t25U-v4k4ts", "vbZ-0lGPneg"], "start_seconds": ["40", "30"], "properties": ["bees buzz, birds chirp, man speaks", "a woman, a television program, a bird"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program?", "label": 1}, {"captions": ["someone snores nearby", "motors rev and run loudly as a person laughs"], "sample_ids": ["spJCm8tD9Zo", "zl9Dqx-j7q4"], "start_seconds": ["90", "6"], "properties": ["someone snores, nearby, someone", "motors rev, laugh, loudly"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a person is snoring loudly", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["xfaoyyzw2WU", "y8WEcpOlT3I"], "start_seconds": ["180", "40"], "properties": ["loud, jet engine, roar", "harsh, wind, blows"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "on how to use a sewing machine youtube"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a man is speaking with wind noise in the background "], "question": "which entity is louder", "label": 0}, {"captions": ["a person snores hilariously while someone laughs", "a speedboat passes quickly on the water"], "sample_ids": ["sSMl2vc3ek", "tjmoSi330GM"], "start_seconds": ["20", "23"], "properties": ["a person, laughs, snores", "speed, water, boat"], "captions_pred_video": [null, "a person riding a jet ski on a lake with trees in the background"], "captions_pred_audio": ["a person snoring loudly", "a motorboat speeds through water with wind noise "], "question": "which is moving faster", "label": 0}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "wind blows and women speak as livestock vocalizes"], "sample_ids": ["siJFXfGWgDk", "vXlk0lIQBFo"], "start_seconds": ["50", "470"], "properties": ["man, woman, vehicle", "wind, speak, vocalize"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "- a woman and two donkeys in a fenced in area"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "wind chimes are ringing and people are speaking and laughing "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["material crumbles into a microphone", "people talk quietly in the distance, followed by a police car siren wailing"], "sample_ids": ["vofpvUo6NAw", "wy1eKjR7KC0"], "start_seconds": ["220", "30"], "properties": ["material, crumbles, microphone", "people, talk, distance"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "two police officers riding motorcycles down the street"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a man is speaking and a siren is going off"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "birds chirp and objects are moved around"], "sample_ids": ["x5cuQjOdM3E", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["cat, meows, young woman", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a cat meows and a woman speaks", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "several insects fly while two men talk"], "sample_ids": ["sQGXqGcwOTc", "s-T9OVOiMLo"], "start_seconds": ["3", "330"], "properties": ["audio, kid, giggles", "several, fly, men"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a video?", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "a stream of water runs briefly"], "sample_ids": ["uKCSGgof8gI", "x-PeY8Yb8M4"], "start_seconds": ["12", "300"], "properties": ["chirps, distance, signal", "stream, water, run"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "a person snores loudly multiple times at a close distance"], "sample_ids": ["yI-KvObbDoY", "sSMl2vc3ek"], "start_seconds": ["260", "20"], "properties": ["sound, smack, wind", "loud, multiple, distance"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", null], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "continuous sizzling with a woman speaking towards the end"], "sample_ids": ["zdYdyF9-m8U", "ukxt9I7eMMg"], "start_seconds": ["7", "30"], "properties": ["wind, crash, shoreline", "continuous, woman, speaking"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "footage of a person preparing food on a stool in a kitchen"], "captions_pred_audio": ["waves crash and wind blows ", "a woman is speaking while food is frying in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["a child speaks", "a toilet flushes and water drains"], "sample_ids": ["yW6FWLSLkx4", "sfAvvZwdLCY"], "start_seconds": ["40", "20"], "properties": ["a, child, speaks", "water drains, flushes, water"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vlS6YMeWAPo", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["sheep, baa, birds", "female, spraying, scream"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a goat bleats and birds chirp", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a man speaks as horns blow"], "sample_ids": ["sK4u5T8hW78", "tHyNqRyK34A"], "start_seconds": ["30", "24"], "properties": ["a, car, pass", "a, man, speaks"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "being taken from inside a vehicle on the street at night"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and a car is honking with background noise "], "question": "which entity is about a man speaking as a car passes by?", "label": 0}, {"captions": ["a car accelerates and wind blows", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["u0TrcHhkPQ", "tw76HGONaKg"], "start_seconds": ["20", "570"], "properties": ["accelerates, wind, blows", "A, game, keyboard"], "captions_pred_video": [null, "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man speaks and types on a computer keyboard "], "question": "which entity is playing a game", "label": 1}, {"captions": ["a baby cries and a woman speaks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["tMbMDvT50j8", "vb1fPSDI4c"], "start_seconds": ["12", "30"], "properties": ["a, cry, woman", "multiple, people, yell"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "people speak as gunfire rings out"], "sample_ids": ["xNMovAf3o50", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["rain, thunder, music", "gunfire, ring, speak"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["thunder and rain with music playing in the background ", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "a train horn blows as it passes by"], "sample_ids": ["sLUnaPT5gM8", "zVacuqSb4LI"], "start_seconds": ["0", "30"], "properties": ["loud, laughter, intermittent", "horn, blows, train"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is not a train?", "label": 0}, {"captions": ["motors rev and run loudly as a person laughs", "a horn rings out as a machine runs by"], "sample_ids": ["zl9Dqx-j7q4", "slZLHwNbbt4"], "start_seconds": ["6", "300"], "properties": ["motors rev, laugh, loudly", "a, horn, run"], "captions_pred_video": ["footage of a man driving a car in the dark", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a jet engine roars ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "plastic is tapped on while someone speaks"], "sample_ids": ["vf44CgrjT0A", "wvKpEYswXO0"], "start_seconds": ["20", "150"], "properties": ["loud, long, person", "plastic, tap, speak"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a loud burp", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "an infant crying as a woman laughs"], "sample_ids": ["w2JXXIAdUdg", "xhmRY9yhC7c"], "start_seconds": ["10", "20"], "properties": ["snoring, distance, person", "a, laugh, infant"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["v5P-ThUCINM", "zj2R0XoFr5k"], "start_seconds": ["400", "50"], "properties": ["background, chirp, bird", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a woman speaks while a helicopter flies overhead "], "question": "which entity has a boy speaking?", "label": 1}, {"captions": ["waves of water rumble", "a car speeding up in the distance"], "sample_ids": ["vwqaIHKxLvM", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["sound, wave, water", "distance, car, speed"], "captions_pred_video": ["of a surfer riding a big wave in the ocean", null], "captions_pred_audio": ["waves crash and wind blows ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["sawing of wood and rustling with leaves blowing in the distance", "a man speaks as a motor runs in the background"], "sample_ids": ["uiItxDsDMFI", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["sound, distance, leaves", "background, motor, run"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a saw is being used with background noise ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sZPuqDgX2V0", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["engine, accelerate, intercom", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity shows a man talking to the camera?", "label": 0}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "water pouring and bubbling"], "sample_ids": ["vqZuVbG6-HI", "uyRfq-jKPpo"], "start_seconds": ["130", "50"], "properties": ["background, male, female", "water, bubbles, pouring"], "captions_pred_video": ["footage is blurry because it's raining outside", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "water is running from a faucet"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a cat meows and children speak", "some tunes played by whistling"], "sample_ids": ["x5cuQjOdM3E", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["cat, speak, children", "tune, play, whistling"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a cat meows and a woman speaks", "a person whistling a song"], "question": "which entity is playing a tune", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "someone is typing on a computer keyboard"], "sample_ids": ["tDVADusiIoc", "v0x1odnXtP0"], "start_seconds": ["60", "210"], "properties": ["wind, radio, waves", "keyboard, type, computer"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a person is typing on a keyboard"], "question": "which is not a type of device", "label": 0}, {"captions": ["distant men speak as a spray can nozzle is depressed", "an airplane engine runs"], "sample_ids": ["rwtmaKiCcQU", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["nozzle, depressed, spray can", "engine, airplane, runs"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["spraying and people speaking", "a car is driving by on the road "], "question": "which is a moving object", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "an engine works in idle nearby followed by a man talking"], "sample_ids": ["vhJWZheqaE", "wqADXCzngMw"], "start_seconds": ["0", "340"], "properties": ["water drains unevenly, toilet flushes, water drains", "engine, idle, man"], "captions_pred_video": [null, "of a man working on a vintage volkswagen beetle"], "captions_pred_audio": ["a toilet is flushed", "a lawn mower is running and a man is speaking "], "question": "which entity is a machine?", "label": 0}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vddP56-ogds", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["water, splash, person, laugh", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water?", "label": 0}, {"captions": ["long loud burping by a man", "wind blows as people chatter quietly"], "sample_ids": ["xmiUIOhtZyQ", "xBxDz0CFVn0"], "start_seconds": ["60", "30"], "properties": ["loud, burp, man", "wind, chatter, people"], "captions_pred_video": ["homer simpson drinking a beer", "footage is blurry and out of focus"], "captions_pred_audio": ["a person burps and music plays in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "a man speaks followed by another man speaking outside"], "sample_ids": ["wsHBIgzs9Fs", "viuTg1M-dqg"], "start_seconds": ["50", "30"], "properties": ["horn, continuous, buzzing", "two men, speak, follow"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking followed by another man speaking?", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["ylpYOorfH4o", "wz7N8YRy74I"], "start_seconds": ["410", "30"], "properties": ["engine, running, wind", "rooster, crow, background, men"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["an engine runs and a man speaks", "a train horn blows as it passes by"], "sample_ids": ["yT5WfYMRr-U", "zVacuqSb4LI"], "start_seconds": ["30", "30"], "properties": ["engine, run, man", "horn, blows, train"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which train is blowing its horn", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "a person snores loudly multiple times at a close distance"], "sample_ids": ["yDoT73BWsdA", "sSMl2vc3ek"], "start_seconds": ["10", "20"], "properties": ["engine, revs, vehicle", "loud, multiple, distance"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "a horn blows as a train chugs along and warning bells ring"], "sample_ids": ["wSVhSdj0F0", "ukg5L09Wpvo"], "start_seconds": ["10", "150"], "properties": ["beep, clang, footsteps", "a train, a horn, a bell"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sYITalLZjj4", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["water, rushes, background, birds", "three men, wind, flow"], "captions_pred_video": ["two ducks are swimming in the water near each other", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["wind blows and birds chirp", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 0}, {"captions": ["a diesel truck engine runs continuously", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sZvwOuuPGP0", "su6FAOcOA8c"], "start_seconds": ["50", "4"], "properties": ["engine, diesel, truck", "engine, idle, woman"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a medium engine is running ", "a woman is speaking and a subway train is moving "], "question": "which entity has a running engine", "label": 0}, {"captions": ["water flows as a woman laughs and a man speaks", "a man talks nearby and another man talks far away while some liquid flows"], "sample_ids": ["vddP56-ogds", "sapQIQUhFc"], "start_seconds": ["30", "280"], "properties": ["water, flow, laugh", "liquid, flow, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking and a stream is flowing in the background "], "question": "which entity shows a man speaking?", "label": 0}, {"captions": ["a cat meows and children speak", "water is sprayed across a hard surface"], "sample_ids": ["x5cuQjOdM3E", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["cat, speak, children", "water, spray, surface"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a cat meows and a woman speaks", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "dishes cling together then a man begins to speak"], "sample_ids": ["xyL9F5VrjkE", "sQGXqGcwOTc"], "start_seconds": ["20", "3"], "properties": ["wind, blows, vehicle", "cling, speak, dishes"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "mechanisms are operating and water is splashing "], "question": "which entity is about a vehicle engine running?", "label": 0}, {"captions": ["an insect buzzes around continuously", "wind blowing followed by a zoom"], "sample_ids": ["v25l1jef3JY", "vr8ZXjEBhMQ"], "start_seconds": ["0", "150"], "properties": ["buzzes, continuously, insect", "wind, blow, zoom"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is not a zoom?", "label": 0}, {"captions": ["a airplane flies overhead as a woman speaks", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["zj2R0XoFr5k", "uEU-Hg5MTN8"], "start_seconds": ["50", "27"], "properties": ["airplane, fly, woman", "a woman, laughs, animal"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["a drill runs and two people laugh", "a stream of water runs briefly"], "sample_ids": ["tEE3MpBt1sg", "x-PeY8Yb8M4"], "start_seconds": ["50", "300"], "properties": ["two people, laugh, drill", "stream, water, run"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "water splashes as an animal walks through"], "sample_ids": ["xjvTpk2Zpr8", "w1ir-sZ3Im8"], "start_seconds": ["70", "90"], "properties": ["wind, blows, vehicle", "animal, water, splashes"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a jet engine roars and wind blows ", "water splashes and gurgles as people speak"], "question": "which entity is more calm", "label": 1}, {"captions": ["birds fly and flutter around", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["wGKgwOP3h30", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["fly, flutter, around", "loud, laughter, intermittent"], "captions_pred_video": ["of the pigeons in the coop", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["pigeons coo and flap their wings", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["zofjfKhqLk8", "wz7N8YRy74I"], "start_seconds": ["10", "30"], "properties": ["background, metal, clank", "rooster, crow, background, men"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "a horn rings out as a machine runs by"], "sample_ids": ["sYITalLZjj4", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["water, rushes, background, birds", "a, horn, run"], "captions_pred_video": ["two ducks are swimming in the water near each other", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["wind blows and birds chirp", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is more active", "label": 1}, {"captions": ["water rushes by", "a man talks as several small engines run"], "sample_ids": ["x-PeY8Yb8M4", "u9A6VZQCZpU"], "start_seconds": ["300", "30"], "properties": ["water, rushes, by", "a, man, talk"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", null], "captions_pred_audio": ["a car is driving on a wet road ", "a man is speaking while a race car is revving and accelerating "], "question": "which entity is a video of a man talking?", "label": 1}, {"captions": ["a jet engine spools up and takes off", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vBslzh7saPw", "tiDFTC-5vU"], "start_seconds": ["90", "30"], "properties": ["engine, spools, takes", "male, duck, laugh"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "a toilet flushes and a female speaks"], "sample_ids": ["zj2R0XoFr5k", "yaln9y8I7ms"], "start_seconds": ["50", "230"], "properties": ["airplane, boy, fly", "female, flushes, toilet"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a toilet flushes and a man speaks"], "question": "which entity is a video of a toilet flushing?", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "water flows followed by women screaming"], "sample_ids": ["zcDwZ6W7E3E", "w5W5Kqtc8E"], "start_seconds": ["180", "100"], "properties": ["man, speak, motorcycles", "water, flow, women"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", null], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["a vehicle engine runs and someone speaks", "a man speaks as insects buzz and a bird chirps"], "sample_ids": ["zF8yoL0rkbI", "t25U-v4k4ts"], "start_seconds": ["30", "40"], "properties": ["engine, run, someone", "a, chirps, bird"], "captions_pred_video": ["footage of the traffic on the street at night", "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a man is speaking and bees are buzzing"], "question": "which entity is a bird?", "label": 1}, {"captions": ["continuous sneezing together with speech", "a person is whistling"], "sample_ids": ["x4dZyf9Gbj0", "sIXTftIuUgw"], "start_seconds": ["130", "90"], "properties": ["continuous, sneeze, speech", "person, whistling, person"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a woman sneezes and speaks", "a person whistling a song"], "question": "which entity is a person", "label": 1}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "a helicopter engine runs"], "sample_ids": ["ukg5L09Wpvo", "t5ZbXbniOWk"], "start_seconds": ["150", "30"], "properties": ["clickety-clack, train, whistle", "engine, helicopter, run"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a helicopter is flying overhead "], "question": "which entity is a machine", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "people cheer as a vehicle engine revs"], "sample_ids": ["tEE3MpBt1sg", "xjhAnI2q6hM"], "start_seconds": ["50", "6"], "properties": ["drill, something, laugh", "engine revs, vehicle, people"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a truck is revving its engine and a man is speaking "], "question": "which is a vehicle", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a motor runs in the distance as a soft wind periodically gusts"], "sample_ids": ["xSKJGCItUWE", "xyL9F5VrjkE"], "start_seconds": ["10", "20"], "properties": ["engine, run, boy", "wind, motor, distance"], "captions_pred_video": ["footage of the helicopter flying in the room", "of a caterpillar truck loading logs into a trailer"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "the wind is blowing and a car is passing by "], "question": "which entity is a motor?", "label": 1}, {"captions": ["a woman and man are speaking", "frogs croak and vocalize"], "sample_ids": ["vbpKkWvfOu4", "yswmmRZFItk"], "start_seconds": ["560", "0"], "properties": ["two people, speaking, woman, man", "croak, vocalize, frog"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "a close up of a frog in the water"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a frog is croaking"], "question": "which entity is not a person", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "a man speaks as a motor runs in the background"], "sample_ids": ["wqADXCzngMw", "xZepNM9qcRA"], "start_seconds": ["340", "30"], "properties": ["engine, idle, man", "background, motor, run"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man speaks while water drains", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["vSeGhaZt-aI", "uEU-Hg5MTN8"], "start_seconds": ["50", "27"], "properties": ["water, drain, man", "animal, grunts, snorts"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "wind blows as people chatter quietly"], "sample_ids": ["wTideSjRFS0", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["food, sizzle, woman", "wind, chatter, people"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a vehicle engine runs while a siren and horn sound", "water is sprayed across a hard surface"], "sample_ids": ["u--KhUW8l1Y", "sQwlkXjQabo"], "start_seconds": ["0", "10"], "properties": ["engine, sound, horn", "water, spray, surface"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a kid speaks followed by music playing", "a fly buzzes around loudly as birds chirp"], "sample_ids": ["tQWGZLItBXk", "uJV8NDaHqqk"], "start_seconds": ["170", "100"], "properties": ["music, kid, speak", "loud, fly, chirp"], "captions_pred_video": ["worms revolution screenshots", "a bee hive in a wooden box"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a swarm of bees buzzing around"], "question": "which entity is louder", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "a person snores loudly multiple times at a close distance"], "sample_ids": ["zFjIWfSD-4", "sSMl2vc3ek"], "start_seconds": ["410", "20"], "properties": ["People, motor, brakes", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["water splashes as an animal walks through", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["w1ir-sZ3Im8", "xKB8O8LTs6s"], "start_seconds": ["90", "70"], "properties": ["animal, water, splashes", "music, gunfire, explosion"], "captions_pred_video": ["footage of a group of people riding horses through a river", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["water splashes and gurgles as people speak", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wAAkbZToh8", "xKB8O8LTs6s"], "start_seconds": ["0", "70"], "properties": ["burp, laugh, speak", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man burps and a woman speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "a duck quacks loudly and continuously"], "sample_ids": ["yVumC9TGknc", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["humming, clock, birds", "loud, continuous, quacks"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a series of beeps and chirps", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a stream runs then someone speaks", "some men converse over an engine running"], "sample_ids": ["wbHTKEJZyhc", "sCiy7QS1U"], "start_seconds": ["20", "300"], "properties": ["stream, run, someone", "men, converse, engine"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", null], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a stream running?", "label": 0}, {"captions": ["a person screams glaringly", "wind blowing followed by a zoom"], "sample_ids": ["xC8kbrKJmco", "vr8ZXjEBhMQ"], "start_seconds": ["0", "150"], "properties": ["glaringly, screams, person", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a goat is bleating ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "a man speaks followed by another man speaking outside"], "sample_ids": ["xBxDz0CFVn0", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["stream, water, flow", "two men, speak, follow"], "captions_pred_video": ["footage is blurry and out of focus", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity shows two men speaking?", "label": 1}, {"captions": ["a person snores loudly multiple times at a close distance", "a infant makes noise and is excited"], "sample_ids": ["sSMl2vc3ek", "wIJK3-5y0kA"], "start_seconds": ["20", "30"], "properties": ["loud, multiple, distance", "noise, excited, infant"], "captions_pred_video": [null, "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a person snoring loudly", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "children speak and play together"], "sample_ids": ["x5cuQjOdM3E", "yVVP8XvWJTo"], "start_seconds": ["30", "260"], "properties": ["cat, meows, young woman", "children, speak, play"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage of a playground at a school or daycare center"], "captions_pred_audio": ["a cat meows and a woman speaks", "children are speaking and breathing with background noise "], "question": "which entity is more social", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "dishes cling together then a man begins to speak"], "sample_ids": ["yajyRTUQk3U", "sQGXqGcwOTc"], "start_seconds": ["400", "3"], "properties": ["a woman, something, fried", "cling, speak, dishes"], "captions_pred_video": ["- a woman cooking in the kitchen", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "mechanisms are operating and water is splashing "], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["motors runs briefly and tires screech", "some tunes played by whistling"], "sample_ids": ["yRx9txMcBl0", "u6BnG6YZqJ4"], "start_seconds": ["40", "0"], "properties": ["motors, tires, screech", "tune, play, whistling"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a few ducks quack and scamper and a man speaks", "a door opens and closes"], "sample_ids": ["w2bYrCVLT60", "vBHyYJ8pL0"], "start_seconds": ["120", "2"], "properties": ["ducks, speak, quack", "open, close, door"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", null], "captions_pred_audio": ["ducks are quacking and a man is speaking", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is a door?", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "birds chirp, a woman speaks, and insects buzz"], "sample_ids": ["ugHJF0hfYkg", "t97k0cejSQE"], "start_seconds": ["10", "250"], "properties": ["loud, intense, propeller", "sound, chirp, buzz"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a bee on a purple thistle flower"], "captions_pred_audio": ["a helicopter is flying overhead ", "a bee buzzes and a woman speaks"], "question": "which entity is quieter", "label": 1}, {"captions": ["an audience gives applause as a man yells and a group sings", "a vehicle engine accelerating then running on idle"], "sample_ids": ["tdWhHV3X25Q", "vYkA3cfXp5Q"], "start_seconds": ["60", "30"], "properties": ["applause, audience, yells", "engine, accelerate, idle"], "captions_pred_video": ["a man is talking to another man on a stage in front of a microphone", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "an engine is idling"], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a clock ticktocks briefly", "birds chirp and objects are moved around"], "sample_ids": ["u7C-AEBQM", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["ticktocks, clock, ticktocks briefly", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a ticktock of a clock", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "wind blowing followed by a zoom"], "sample_ids": ["s4Uz1Ffgo04", "vr8ZXjEBhMQ"], "start_seconds": ["100", "150"], "properties": ["water, rushes, vehicle", "wind, blow, zoom"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more likely to be a video of a vehicle zooming past?", "label": 0}, {"captions": ["an aircraft engine runs as people speak", "water flows as men speak and yell"], "sample_ids": ["wTjoRj1se3U", "vJ7JPEFhyLA"], "start_seconds": ["390", "16"], "properties": ["engine, run, people", "water, flow, men"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a jet engine is running and people are talking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a man speaking and yelling?", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["uPDn2BFTHk", "su6FAOcOA8c"], "start_seconds": ["140", "4"], "properties": ["lady, laugh, baby", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["a goat bleats as a person speaks", "small dogs yip and bark sharply"], "sample_ids": ["tPJvjq9QePY", "v-wcQf4BDY0"], "start_seconds": ["40", "120"], "properties": ["bleats, person, speak", "bark, yip, sharply"], "captions_pred_video": ["a dog and a sheep in a barn", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a baby cries and a man speaks", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "winds blows roughly as a vehicle races past"], "sample_ids": ["zl9Dqx-j7q4", "xjvTpk2Zpr8"], "start_seconds": ["6", "70"], "properties": ["engine, laugh, loud", "wind, blows, vehicle"], "captions_pred_video": ["footage of a man driving a car in the dark", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a jet engine roars ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a man speaks as a car is passing by", "men speak and a nozzle sprays liquid"], "sample_ids": ["sK4u5T8hW78", "wRV8yMk886E"], "start_seconds": ["30", "0"], "properties": ["a, car, pass", "liquid, spray, nozzle"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "two cars are parked in a parking lot at night"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man speaks followed by a loud burst"], "question": "which entity is about liquid spraying?", "label": 1}, {"captions": ["someone sprays a liquid onto a hard surface making a hiss sound", "someone sprays a liquid onto a hard surface making a hiss sound"], "sample_ids": ["zO-LSSY92ZM", "zO-LSSY92ZM"], "start_seconds": ["30", "30"], "properties": ["liquid, surface, sound", "liquid, surface, sound"], "captions_pred_video": ["youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'", "youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'"], "captions_pred_audio": ["steam is hissing and hissing", "steam is hissing and hissing"], "question": "which entity is a liquid?", "label": 0}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "a man speaks as a motor runs in the background"], "sample_ids": ["w8uLijTqtlU", "xZepNM9qcRA"], "start_seconds": ["70", "30"], "properties": ["wind, microphone, noise", "background, motor, run"], "captions_pred_video": ["footage is blurry and shaky", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["the wind is blowing strongly", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vhJWZheqaE", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["water drains unevenly, toilet flushes, water drains", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["water pouring and bubbling", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["uyRfq-jKPpo", "w5W5Kqtc8E"], "start_seconds": ["50", "100"], "properties": ["water, bubbles, pouring", "wind, blow, vehicle"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", null], "captions_pred_audio": ["water is running from a faucet", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a telephone rings and a bird vocalizes", "birds chirp, a woman speaks, and insects buzz"], "sample_ids": ["skd2PphS6oI", "t97k0cejSQE"], "start_seconds": ["190", "250"], "properties": ["ring, bird, vocalize", "sound, chirp, buzz"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", "a bee on a purple thistle flower"], "captions_pred_audio": ["a telephone bell rings repeatedly ", "a bee buzzes and a woman speaks"], "question": "which entity has a bird vocalize?", "label": 0}, {"captions": ["white noise and snoring with some rustling in the background", "three men talk while wind blows and some liquid flows"], "sample_ids": ["xzKKf9bKNUo", "vJ7JPEFhyLA"], "start_seconds": ["10", "16"], "properties": ["background, noise, snoring", "three men, wind, flow"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "people speak and tapping occurs"], "sample_ids": ["x5cuQjOdM3E", "tFCUUGdREgA"], "start_seconds": ["30", "70"], "properties": ["cat, talk, meow", "people, tap, speak"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a person riding a white horse in an indoor arena"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking and walking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a car accelerates and wind blows", "a man speaks as a car is passing by"], "sample_ids": ["u0TrcHhkPQ", "sK4u5T8hW78"], "start_seconds": ["20", "30"], "properties": ["accelerates, wind, blows", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking with background noise and breathing sounds "], "question": "which car is moving faster", "label": 0}, {"captions": ["an adult man speaks over glass clinking", "wind blows as people chatter quietly"], "sample_ids": ["u6jIvCtKarQ", "xBxDz0CFVn0"], "start_seconds": ["70", "30"], "properties": ["a, man, speaks", "wind, chatter, people"], "captions_pred_video": ["footage of a person using a blender on a stove top", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "wind blows as people chatter quietly"], "sample_ids": ["w5W5Kqtc8E", "xBxDz0CFVn0"], "start_seconds": ["100", "30"], "properties": ["wind, engine, scream", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["zY3icUyMdh8", "ukg5L09Wpvo"], "start_seconds": ["20", "150"], "properties": ["dog, bark, engine", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["y1saVTXsKwc", "uYT5gxnyMWM"], "start_seconds": ["80", "50"], "properties": ["a, dog, talk", "female, spraying, scream"], "captions_pred_video": ["a dog playing with a pink ball", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a dog barks and a man speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a mechanical buzzing getting louder", "waves crash against a shoreline and people speak"], "sample_ids": ["sEprKHm8Sj8", "yFB25fqfU8I"], "start_seconds": ["90", "300"], "properties": ["noise, loud, buzzing", "wave, crash, shoreline"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "people speak softly as food sizzles"], "sample_ids": ["y8dSeubCNI", "yhQ2Lg-7qDY"], "start_seconds": ["4", "130"], "properties": ["engine revving, people speaking, motorcycle", "food, sizzle, speak"], "captions_pred_video": [null, "a pan filled with meat and sauce being cooked on a stove top"], "captions_pred_audio": ["an engine revving and people talking in the background", "a faucet is running and a man is speaking"], "question": "which entity is more quiet", "label": 1}, {"captions": ["ticking continues without interruption", "a frog croaks as other frogs croak in the background"], "sample_ids": ["v-g-j2uTByM", "yswmmRZFItk"], "start_seconds": ["30", "0"], "properties": ["ticking, continuous, clock", "background, frog, croak"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "a close up of a frog in the water"], "captions_pred_audio": ["a clock is ticking loudly", "a frog is croaking"], "question": "which entity is not continuous", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["xOZfdgAgJ9o", "yajyRTUQk3U"], "start_seconds": ["40", "400"], "properties": ["woman, whimpering, speaking", "a woman, something, fried"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking while food is frying in the background"], "question": "which woman is speaking", "label": 1}, {"captions": ["a toilet flushes and water drains", "pigeons vocalize and birds chirp"], "sample_ids": ["sfAvvZwdLCY", "uiS58TNyUiw"], "start_seconds": ["20", "430"], "properties": ["water drains, flushes, water", "vocalize, bird, chirp"], "captions_pred_video": ["footage of the toilet in the bathroom", "of the pigeon in the cage"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a frog vocalizes while birds chirp", "people applaud and hoot and chat quietly"], "sample_ids": ["vMf1dLD6Sng", "wwyfGO2J4"], "start_seconds": ["6", "90"], "properties": ["frog, bird, vocalize", "people, applaud, hoot"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", null], "captions_pred_audio": ["a frog croaks loudly", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a bird is chirping and tweeting a bird song", "a infant makes noise and is excited"], "sample_ids": ["wPz6QRAkEb4", "wIJK3-5y0kA"], "start_seconds": ["60", "30"], "properties": ["chirps, tweets, song", "noise, excited, infant"], "captions_pred_video": ["a bird in a cage on top of a pole", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["birds are chirping in the background ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "an engine works in idle nearby followed by a man talking"], "sample_ids": ["weDbePuc-Xc", "wqADXCzngMw"], "start_seconds": ["40", "340"], "properties": ["cartoon character, music, vocalize", "engine, idle, man"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "of a man working on a vintage volkswagen beetle"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a lawn mower is running and a man is speaking "], "question": "which entity is a video of a man talking?", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["zcDwZ6W7E3E", "uEU-Hg5MTN8"], "start_seconds": ["180", "27"], "properties": ["man, speak, motorcycles", "animal, grunts, snorts"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["a duck quacks continuously", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vh30P49Po6s", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["quacks, continuously, duck", "music, gunfire, explosion"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a duck is quacking loudly", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is a scene from a movie", "label": 1}, {"captions": ["continuous snoring", "a car speeding up in the distance"], "sample_ids": ["sLkeqCDJIyw", "u0TrcHhkPQ"], "start_seconds": ["120", "20"], "properties": ["loud, snoring, noise", "distance, car, speed"], "captions_pred_video": [", what is the man doing on the couch? sleeping", null], "captions_pred_audio": ["a person is snoring loudly", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["people clap and speak in the distance", "vehicle tires screech and a man speaks before a car door opens"], "sample_ids": ["wwyfGO2J4", "sxYkFKFIZD0"], "start_seconds": ["90", "20"], "properties": ["clap, distance, speak", "screech, man, door"], "captions_pred_video": [null, "2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a man is speaking while a car is revving and accelerating with a squeal in the background "], "question": "which entity is about a car door opening?", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "a frog croaks as other frogs croak in the background"], "sample_ids": ["xKB8O8LTs6s", "yswmmRZFItk"], "start_seconds": ["70", "0"], "properties": ["music, gunfire, explosion", "background, frog, croak"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a close up of a frog in the water"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a frog is croaking"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "someone is typing on a computer keyboard"], "sample_ids": ["uYT5gxnyMWM", "v0x1odnXtP0"], "start_seconds": ["50", "210"], "properties": ["a, scream, girl", "keyboard, type, computer"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "how to make money on youtube in spanish"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a person is typing on a keyboard"], "question": "which is not a person", "label": 1}, {"captions": ["someone snores nearby", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["spJCm8tD9Zo", "wz7N8YRy74I"], "start_seconds": ["90", "30"], "properties": ["someone snores, nearby, someone", "rooster, crow, background, men"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man speaking to a rooster?", "label": 1}, {"captions": ["an insect buzzes around continuously", "a duck quacks continuously"], "sample_ids": ["v25l1jef3JY", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["buzzes, continuously, insect", "quacks, continuously, duck"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a duck is quacking loudly"], "question": "which entity is a bird", "label": 1}, {"captions": ["electronic beeps occur in a short series", "someone whistles a tune"], "sample_ids": ["y682ml90jGw", "sIXTftIuUgw"], "start_seconds": ["11", "90"], "properties": ["beeps, series, electronic", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "a person whistling a song"], "question": "which is not a musical instrument", "label": 0}, {"captions": ["a train horn sounds as a railroad passing bell rings", "a car accelerates and wind blows"], "sample_ids": ["zgUgkpk78xU", "u0TrcHhkPQ"], "start_seconds": ["70", "20"], "properties": ["horn, bell, train", "accelerates, wind, blows"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", null], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks as horns blow", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["tHyNqRyK34A", "tDVADusiIoc"], "start_seconds": ["24", "60"], "properties": ["a, man, speaks", "water, radio, man"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sOa7g-44Dag", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["background, man, spray", "stream, water, flow"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "a man speaks followed by another man speaking outside"], "sample_ids": ["sG7TyPnFDR0", "viuTg1M-dqg"], "start_seconds": ["180", "30"], "properties": ["beeps, machine, smoke alarm", "two men, speak, follow"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking while a machine runs?", "label": 0}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["uEU-Hg5MTN8", "tiDFTC-5vU"], "start_seconds": ["27", "30"], "properties": ["animal, grunts, snorts", "male, duck, laugh"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking?", "label": 1}, {"captions": ["loud, continuous burping", "a person snores loudly multiple times at a close distance"], "sample_ids": ["y636gklDioE", "sSMl2vc3ek"], "start_seconds": ["20", "20"], "properties": ["loud, continuous, burping", "loud, multiple, distance"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", null], "captions_pred_audio": ["a person burps loudly several times", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "wind blows as people chatter quietly"], "sample_ids": ["yaln9y8I7ms", "xBxDz0CFVn0"], "start_seconds": ["230", "30"], "properties": ["female, flushes, toilet", "wind, chatter, people"], "captions_pred_video": ["footage is blurry and out of focus", "footage is blurry and out of focus"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "water runs from a faucet while some men speak and the water runs in the sink"], "sample_ids": ["w9lpbUn0hPc", "vzceMbklWc"], "start_seconds": ["30", "180"], "properties": ["male, wind, rustling", "water, faucet, sink"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", null], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "water is running and a man is speaking"], "question": "which entity is a video of water running?", "label": 1}, {"captions": ["people clap and speak in the distance", "multiple people speak and children yell while water gurgles"], "sample_ids": ["wwyfGO2J4", "vb1fPSDI4c"], "start_seconds": ["90", "30"], "properties": ["clap, distance, speak", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["zhx6hoYrHeI", "wqZ135Ssz0"], "start_seconds": ["160", "60"], "properties": ["engine, sputter, rough", "two men, woman, birds"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is talking", "label": 1}, {"captions": ["a clock ticktocks in wind", "small dogs yip and bark sharply"], "sample_ids": ["yVumC9TGknc", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["ticktocks, clock, wind", "bark, yip, sharply"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a series of beeps and chirps", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "a motorcycle idles loudly as wind blows"], "sample_ids": ["sOa7g-44Dag", "v7jJS8aAyA"], "start_seconds": ["30", "10"], "properties": ["background, man, spray", "wind, blows, loudly"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", null], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a motorcycle engine is idling and vibrating"], "question": "which entity is louder", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "a car speeding up in the distance"], "sample_ids": ["tDlysoZiA1I", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["animal, grunt, multiple", "distance, car, speed"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", null], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "a stream of water runs briefly"], "sample_ids": ["wRBHTgrbiwg", "x-PeY8Yb8M4"], "start_seconds": ["50", "300"], "properties": ["bird, owl, speak", "stream, water, run"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "water quietly rushes by while birds chirp in the background"], "sample_ids": ["y8dSeubCNI", "sYITalLZjj4"], "start_seconds": ["4", "30"], "properties": ["engine revving, people speaking, motorcycle", "water, rushes, background, birds"], "captions_pred_video": [null, "two ducks are swimming in the water near each other"], "captions_pred_audio": ["an engine revving and people talking in the background", "wind blows and birds chirp"], "question": "which entity is quieter", "label": 1}, {"captions": ["a person is snoring while sleeping", "people cheer as a vehicle engine revs"], "sample_ids": ["vJrjSeP17yE", "xjhAnI2q6hM"], "start_seconds": ["40", "6"], "properties": ["a person is sleeping, snoring, person", "engine revs, vehicle, people"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a person snoring loudly", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a woman speaks with water running", "a horn blasts as warning bells ring"], "sample_ids": ["wTideSjRFS0", "zgUgkpk78xU"], "start_seconds": ["30", "70"], "properties": ["water, running, woman", "horn, bells, ring"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a train blows its horn as it speeds down the tracks "], "question": "which entity is a warning", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "water pouring and bubbling"], "sample_ids": ["soTOh3zYJfY", "uyRfq-jKPpo"], "start_seconds": ["40", "50"], "properties": ["vehicle, skid, tires", "water, bubbles, pouring"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "a man speaking with light rustling"], "sample_ids": ["y1saVTXsKwc", "zOZleIRqZm4"], "start_seconds": ["80", "80"], "properties": ["a, dog, talk", "light, rustling, man"], "captions_pred_video": ["a dog playing with a pink ball", "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a dog barks and a man speaks", "a man is speaking with crickets chirping in the background"], "question": "which entity is a man speaking with light rustling?", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["tDVADusiIoc", "uYT5gxnyMWM"], "start_seconds": ["60", "50"], "properties": ["man, radio, blows", "female, spraying, scream"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["a vehicles accelerate quickly and someone laughs", "a man speaks as a car is passing by"], "sample_ids": ["uWPRNLnpy7Y", "sK4u5T8hW78"], "start_seconds": ["10", "30"], "properties": ["accelerate, laugh, vehicle", "a, car, pass"], "captions_pred_video": ["is taken from a car driving down the street", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking with background noise and breathing sounds "], "question": "which vehicle is moving faster", "label": 0}, {"captions": ["a man talks while vehicles pass by", "winds blows roughly as a vehicle races past"], "sample_ids": ["sK4u5T8hW78", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["a, man, talk", "wind, blows, vehicle"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a jet engine roars and wind blows "], "question": "which entity is about a vehicle racing past?", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["uYT5gxnyMWM", "yajyRTUQk3U"], "start_seconds": ["50", "400"], "properties": ["person, spray, yell", "a woman, something, fried"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a large crowd cheers and applauds", "pigeons vocalize and birds chirp"], "sample_ids": ["rqfQRErjfk8", "uiS58TNyUiw"], "start_seconds": ["170", "430"], "properties": ["crowd, cheers, applauds", "vocalize, bird, chirp"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "of the pigeon in the cage"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a man is speaking and a bee is buzzing"], "question": "which entity is not a crowd?", "label": 1}, {"captions": ["a goat screams and people speak in the background", "vehicles pass by on a roadway"], "sample_ids": ["xC8kbrKJmco", "tgbONvsP47Y"], "start_seconds": ["0", "0"], "properties": ["background, goat, scream", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a goat is bleating ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "paper is crumpling consistently"], "sample_ids": ["yVumC9TGknc", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["humming, clock, birds", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a series of beeps and chirps", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "a man speaks as a car is passing by"], "sample_ids": ["wRBHTgrbiwg", "sK4u5T8hW78"], "start_seconds": ["50", "30"], "properties": ["bird, owl, speak", "a, car, pass"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking briefly?", "label": 0}, {"captions": ["distant humming of an engine", "small dogs yip and bark sharply"], "sample_ids": ["yVPZ2MNWpms", "v-wcQf4BDY0"], "start_seconds": ["0", "120"], "properties": ["sound, distance, engine", "bark, yip, sharply"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a car is driving by on the road ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a vehicle engine runs while a siren and horn sound", "an audience gives applause"], "sample_ids": ["u--KhUW8l1Y", "x6iCUDmRpKQ"], "start_seconds": ["0", "38"], "properties": ["engine, sound, horn", "applause, audience, give"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "a black background with the moon and stars in the sky"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a group of people are clapping and cheering"], "question": "which entity is a response to a stimulus", "label": 1}, {"captions": ["water flows and trickles", "a stream of water runs briefly"], "sample_ids": ["tB7hWb9gTuQ", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["water, flow, trickle", "stream, water, run"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["water is splashing and gurgling", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["wind blows strongly", "a horn rings out as a machine runs by"], "sample_ids": ["w8uLijTqtlU", "slZLHwNbbt4"], "start_seconds": ["70", "300"], "properties": ["wind, blows, strongly", "a, horn, run"], "captions_pred_video": ["footage is blurry and shaky", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["the wind is blowing strongly", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is moving", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "a car accelerates and wind blows"], "sample_ids": ["uPDn2BFTHk", "u0TrcHhkPQ"], "start_seconds": ["140", "20"], "properties": ["woman, laughs, speaks", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["women speak and laugh as wind blows", "music plays, a person speaks, followed by whooshes and a ding"], "sample_ids": ["un9VQlzgZM", "tQWGZLItBXk"], "start_seconds": ["5", "170"], "properties": ["wind, speak, laugh", "music, person, ding"], "captions_pred_video": [null, "worms revolution screenshots"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a child speaks music plays video game sounds sound effects and sound effects play "], "question": "which entity has a person speaking?", "label": 1}, {"captions": ["food is frying then a woman speaks", "a horn rings out as a machine runs by"], "sample_ids": ["ukxt9I7eMMg", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["food, woman, speak", "a, horn, run"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is about a machine running by?", "label": 1}, {"captions": ["a speedboat passes quickly on the water", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["tjmoSi330GM", "ukg5L09Wpvo"], "start_seconds": ["23", "150"], "properties": ["speed, water, boat", "clickety-clack, train, whistle"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "a train blows its whistle and blows its horn "], "question": "which entity is moving at a slower speed", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "wind blows as people chatter quietly"], "sample_ids": ["zgUgkpk78xU", "xBxDz0CFVn0"], "start_seconds": ["70", "30"], "properties": ["horn, bells, ring", "wind, chatter, people"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage is blurry and out of focus"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "a toilet flushes and a female speaks"], "sample_ids": ["w2M4i1mklOA", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["loud, chime, bell", "female, flushes, toilet"], "captions_pred_video": ["footage of an antique clock", "footage is blurry and out of focus"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a toilet flushes and a man speaks"], "question": "which entity is silent", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "a stream of water runs briefly"], "sample_ids": ["vSeGhaZt-aI", "x-PeY8Yb8M4"], "start_seconds": ["50", "300"], "properties": ["water, bubbles, speak", "stream, water, run"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vddP56-ogds", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["water, splash, person, laugh", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking nearby?", "label": 0}, {"captions": ["an engine works in idle nearby followed by a man talking", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["wqADXCzngMw", "su6FAOcOA8c"], "start_seconds": ["340", "4"], "properties": ["engine, idle, man", "engine, idle, woman"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a woman is speaking and a subway train is moving "], "question": "which entity has a man talking to an engine?", "label": 0}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "people applaud and hoot and chat quietly"], "sample_ids": ["sQGXqGcwOTc", "wwyfGO2J4"], "start_seconds": ["3", "90"], "properties": ["audio, kid, giggles", "people, applaud, hoot"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", null], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["someone is snoring while sleeping", "a child speaks in closed space"], "sample_ids": ["ujMt0-D-x2k", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["snore, sleep, someone", "child, space, speak"], "captions_pred_video": ["of the dog playing with a toy on the floor", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["speaking following by laughing and clapping", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["u2f5NpsoHBg", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["person, laugh, clap", "rooster, crow, background, men"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity shows a person speaking?", "label": 0}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "a man speaks as a vehicles passes by then a woman speaks"], "sample_ids": ["ylpYOorfH4o", "siJFXfGWgDk"], "start_seconds": ["410", "50"], "properties": ["engine, run, loud", "man, woman, vehicle"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking and birds are chirping in the background "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["a train horn sounds as the train approaches", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["slZLHwNbbt4", "uZesmtKZGSw"], "start_seconds": ["300", "250"], "properties": ["train, horn, sound", "men, talk, cars"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man speaks over intermittent keyboard taps", "a woman speaks as she rubs two objects together"], "sample_ids": ["tw76HGONaKg", "vzxHnu-SFEw"], "start_seconds": ["570", "80"], "properties": ["audio, man, keyboard", "two objects, woman, speak"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "a man speaks as a boat engine runs"], "sample_ids": ["zhx6hoYrHeI", "wtDqrBygTcU"], "start_seconds": ["160", "30"], "properties": ["engine, sputter, rough", "man, engine, run"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", "shows a person riding on the back of a boat as it speeds through the water"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking and a motor is running"], "question": "which entity is a boat?", "label": 1}, {"captions": ["a woman and man are speaking", "some men converse over an engine running"], "sample_ids": ["vbpKkWvfOu4", "sCiy7QS1U"], "start_seconds": ["560", "300"], "properties": ["two people, speaking, woman, man", "men, converse, engine"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows two people speaking", "label": 0}, {"captions": ["a child speaks in closed space", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["yW6FWLSLkx4", "tiDFTC-5vU"], "start_seconds": ["40", "30"], "properties": ["child, space, speak", "male, duck, laugh"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", null], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a man is speaking and ducks are quacking"], "question": "which entity is speaking", "label": 1}, {"captions": ["a stream of water runs briefly", "a man speaks as a car is passing by"], "sample_ids": ["x-PeY8Yb8M4", "sK4u5T8hW78"], "start_seconds": ["300", "30"], "properties": ["stream, water, run", "a, car, pass"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a car is driving on a wet road ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks while water drains", "paper folding and crinkling"], "sample_ids": ["vSeGhaZt-aI", "zPpG3RD8lSs"], "start_seconds": ["50", "20"], "properties": ["water, drain, man", "paper, fold, crinkle"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "the wind blows and a mouse clicks "], "question": "which entity is a demonstration of a process", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "people speak softly as food sizzles"], "sample_ids": ["yswmmRZFItk", "yhQ2Lg-7qDY"], "start_seconds": ["0", "130"], "properties": ["background, frog, croak", "food, sizzle, speak"], "captions_pred_video": ["a close up of a frog in the water", "a pan filled with meat and sauce being cooked on a stove top"], "captions_pred_audio": ["a frog is croaking", "a faucet is running and a man is speaking"], "question": "which entity is silent", "label": 1}, {"captions": ["a goat screams and people speak in the background", "a child speaks in closed space"], "sample_ids": ["xC8kbrKJmco", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["background, goat, scream", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a goat is bleating ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "heavy rain splashes as it falls"], "sample_ids": ["wqADXCzngMw", "wP8ZKrlx3oA"], "start_seconds": ["340", "40"], "properties": ["engine, idle, man", "fall, rain, splash"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "footage of a flooded street in the middle of a desert with mountains in the background"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a heavy rain is falling on a surface"], "question": "which entity is a liquid", "label": 1}, {"captions": ["some clanking with distant murmuring", "three men talk while wind blows and some liquid flows"], "sample_ids": ["uMTTDZ2mb4", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["clanking, murmuring, distant", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "a woman speaks as she rubs two objects together"], "sample_ids": ["vVhthZ45k3Y", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["cat, purr, hiss", "two objects, woman, speak"], "captions_pred_video": ["footage is blurry and out of focus", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a skill", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "someone snores nearby"], "sample_ids": ["wztCSUxOf8", "spJCm8tD9Zo"], "start_seconds": ["130", "90"], "properties": ["a crowd, yells, applauds", "someone snores, nearby, someone"], "captions_pred_video": [null, "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a baby cries and a woman speaks"], "sample_ids": ["tDVADusiIoc", "tMbMDvT50j8"], "start_seconds": ["60", "12"], "properties": ["water, radio, man", "a, cry, woman"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "shows a little girl covering her face with her hands while sitting at a table"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a baby cries and a woman speaks"], "question": "which entity is a human speaking?", "label": 0}, {"captions": ["a beep occurs briefly", "a horn blows as a train chugs along and warning bells ring"], "sample_ids": ["xtWeJ56-U-g", "ukg5L09Wpvo"], "start_seconds": ["20", "150"], "properties": ["beep, occur, briefly", "a train, a horn, a bell"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "a train blows its whistle and blows its horn "], "question": "which entity is a warning", "label": 1}, {"captions": ["water runs into a sink while men speak", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vzceMbklWc", "tiDFTC-5vU"], "start_seconds": ["180", "30"], "properties": ["water, sink, run", "male, duck, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and a man is speaking", "a man is speaking and ducks are quacking"], "question": "which entity has a duck?", "label": 1}, {"captions": ["pigeons vocalize and birds chirp", "pigeons vocalize and birds chirp"], "sample_ids": ["uiS58TNyUiw", "uiS58TNyUiw"], "start_seconds": ["430", "430"], "properties": ["vocalize, bird, chirp", "vocalize, bird, chirp"], "captions_pred_video": ["of the pigeon in the cage", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "gunshots ring out, a man yells, and more shots follow"], "sample_ids": ["vZAw4apG0Es", "vKrYfzleLB8"], "start_seconds": ["30", "110"], "properties": ["people, clock, converse", "a, ring, gunshots"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "stock footage of a person holding a gun in their hand"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking with background noise and a cap gun is fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["tDVADusiIoc", "y8WEcpOlT3I"], "start_seconds": ["60", "40"], "properties": ["water, radio, man", "harsh, wind, blows"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking with wind noise in the background "], "question": "which entity has a man speaking over a radio as wind blows and water splashes?", "label": 0}, {"captions": ["an small aircraft engine runs and a boy speaks", "some tunes played by whistling"], "sample_ids": ["xSKJGCItUWE", "u6BnG6YZqJ4"], "start_seconds": ["10", "0"], "properties": ["engine, run, boy", "tune, play, whistling"], "captions_pred_video": ["footage of the helicopter flying in the room", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["scraping and female speech with distant music", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["yHeVV-xeOxQ", "ziUT9IFTkjg"], "start_seconds": ["130", "10"], "properties": ["female, speech, music", "background, birds, rustling"], "captions_pred_video": ["of a girl milking a goat's udder", null], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "some men converse over an engine running"], "sample_ids": ["uWPRNLnpy7Y", "sCiy7QS1U"], "start_seconds": ["10", "300"], "properties": ["accelerate, laugh, vehicle", "men, converse, engine"], "captions_pred_video": ["is taken from a car driving down the street", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a dog barks and whimpers", "a clock ticktocks"], "sample_ids": ["sShpyu2l4YQ", "v-g-j2uTByM"], "start_seconds": ["0", "30"], "properties": ["barks, whimpers, dog", "ticktocks, clock, ticktocks"], "captions_pred_video": ["the puppies are playing with a toy", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a dog is barking and growling", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaks as horns blow", "children cheer as a man speaks then an audience screams"], "sample_ids": ["tHyNqRyK34A", "vJvryTwuAV8"], "start_seconds": ["24", "16"], "properties": ["a, man, speaks", "audience, cheer, man"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a man is speaking and a crowd is shouting and whooping "], "question": "which man is speaking", "label": 1}, {"captions": ["an adult woman and an adult man speak", "an airplane accelerates briefly"], "sample_ids": ["zTLVJCo4WEE", "zjTG0gaGCUI"], "start_seconds": ["30", "80"], "properties": ["two people, adult, speak", "accelerates, airplane, briefly"], "captions_pred_video": ["- a boy with a rifle aiming at a target", null], "captions_pred_audio": ["a woman speaks and crickets chirp", "a jet engine roars as wind blows "], "question": "which is not a person", "label": 1}, {"captions": ["an audience gives applause", "wind blows as people chatter quietly"], "sample_ids": ["x6iCUDmRpKQ", "xBxDz0CFVn0"], "start_seconds": ["38", "30"], "properties": ["applause, audience, give", "wind, chatter, people"], "captions_pred_video": ["a black background with the moon and stars in the sky", "footage is blurry and out of focus"], "captions_pred_audio": ["a group of people are clapping and cheering", "a man is speaking with wind noise in the background "], "question": "which is quieter", "label": 0}, {"captions": ["females talk and laugh over gusting wind", "a motor idles, accelerates, then slows down."], "sample_ids": ["un9VQlzgZM", "vYkA3cfXp5Q"], "start_seconds": ["5", "30"], "properties": ["females, talk, laugh", "speed, idle, accelerate"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "an engine is idling"], "question": "which entity is not a person", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "a telephone rings followed by a woman talking"], "sample_ids": ["sQGXqGcwOTc", "tGcFnX0GHI"], "start_seconds": ["3", "0"], "properties": ["audio, kid, giggles", "ring, talk, woman"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", null], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a dial tone sounds followed by a woman speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["children speak and play together", "a stream of water flows as people talk and wind blows"], "sample_ids": ["yVVP8XvWJTo", "xBxDz0CFVn0"], "start_seconds": ["260", "30"], "properties": ["children, speak, play", "stream, water, flow"], "captions_pred_video": ["footage of a playground at a school or daycare center", "footage is blurry and out of focus"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "vehicles pass by on a roadway"], "sample_ids": ["vhJWZheqaE", "tgbONvsP47Y"], "start_seconds": ["0", "0"], "properties": ["water drains unevenly, toilet flushes, water drains", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a toilet is flushed", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "a man talks nearby and another man talks far away while some liquid flows"], "sample_ids": ["w8uLijTqtlU", "sapQIQUhFc"], "start_seconds": ["70", "280"], "properties": ["wind, microphone, noise", "liquid, flow, distance"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking and a stream is flowing in the background "], "question": "which entity is more distant", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "an infant crying frantically"], "sample_ids": ["u21-Z5gJCB8", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["background, voice, man", "cry, infant, frantically"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a helicopter engine idles continuously", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["ugHJF0hfYkg", "sLUnaPT5gM8"], "start_seconds": ["10", "0"], "properties": ["engine, idle, continuously", "loud, laughter, intermittent"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a helicopter is flying overhead ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a male speaks and another male speaks", "here comes the train and it starts to blow the horn and get close"], "sample_ids": ["viuTg1M-dqg", "s7knHCFW82w"], "start_seconds": ["30", "30"], "properties": ["two males, speaking, male", "blow horn, get close, train"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "footage of the train on the tracks near a building and a car parked on the side of the road"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a train is blowing its horn and its wheels are squealing "], "question": "which is a train", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "several insects fly while two men talk"], "sample_ids": ["zcDwZ6W7E3E", "s-T9OVOiMLo"], "start_seconds": ["180", "330"], "properties": ["a, man, speak", "several, fly, men"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more insects", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tIY7qOV3rEM", "tdWhHV3X25Q"], "start_seconds": ["0", "60"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "applause, audience, yells"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["y8dSeubCNI", "tdWhHV3X25Q"], "start_seconds": ["4", "60"], "properties": ["men, women, car", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["an engine revving and people talking in the background", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "birds coo incessantly"], "sample_ids": ["s4Uz1Ffgo04", "yZrFNS7GFBQ"], "start_seconds": ["100", "30"], "properties": ["roars, background, people speaking", "coo, bird, incessant"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "of the bird in the cage"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "an owl hoots in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "someone whistles a tune"], "sample_ids": ["s59PfAghdkM", "sIXTftIuUgw"], "start_seconds": ["0", "90"], "properties": ["bird, chirp, background, horse, neigh", "someone, tune, whistle"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", null], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["material crumbles into a microphone", "a man speaks over intermittent keyboard taps"], "sample_ids": ["vofpvUo6NAw", "tw76HGONaKg"], "start_seconds": ["220", "570"], "properties": ["material, crumbles, microphone", "audio, man, keyboard"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a man speaks and types on a computer keyboard "], "question": "which entity is a video", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "water flows and trickles"], "sample_ids": ["wvKpEYswXO0", "tB7hWb9gTuQ"], "start_seconds": ["150", "30"], "properties": ["sound, water, running", "water, flow, trickle"], "captions_pred_video": ["of the person preparing food in the kitchen", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "water is splashing and gurgling"], "question": "which entity has water flowing and trickling?", "label": 1}, {"captions": ["a door slams shut roughly", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["zkKdxzNC97Y", "vfYTJq7nU"], "start_seconds": ["27", "130"], "properties": ["a door, slams, shut", "rustling, ducks, quack"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "a duck quacks and a woman speaks"], "question": "which entity is more likely to be a door", "label": 0}, {"captions": ["a man speaks over intermittent keyboard taps", "pigeons vocalize and birds chirp"], "sample_ids": ["tw76HGONaKg", "uiS58TNyUiw"], "start_seconds": ["570", "430"], "properties": ["audio, man, keyboard", "vocalize, bird, chirp"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "of the pigeon in the cage"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["wind blows as people chatter quietly", "an insect buzzes around continuously"], "sample_ids": ["xBxDz0CFVn0", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["wind, chatter, people", "buzzes, continuously, insect"], "captions_pred_video": ["footage is blurry and out of focus", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a fly is buzzing around a microphone "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a stream of water flows as people talk and wind blows", "winds blows roughly as a vehicle races past"], "sample_ids": ["xBxDz0CFVn0", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["stream, water, flow", "wind, blows, vehicle"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a stream", "label": 0}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "a duck quacks continuously"], "sample_ids": ["w9lpbUn0hPc", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["male, wind, rustling", "quacks, continuously, duck"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "a propeller rotates loudly and intensely"], "sample_ids": ["tQWGZLItBXk", "ugHJF0hfYkg"], "start_seconds": ["170", "10"], "properties": ["music, person, ding", "loud, intense, propeller"], "captions_pred_video": ["worms revolution screenshots", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["a helicopter engine runs", "a man speaks as a motor runs in the background"], "sample_ids": ["t5ZbXbniOWk", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["engine, helicopter, run", "background, motor, run"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "roadway noise occurs and a truck accelerates"], "sample_ids": ["yajyRTUQk3U", "tgbONvsP47Y"], "start_seconds": ["400", "0"], "properties": ["noise, woman, speak", "noise, truck, accelerate"], "captions_pred_video": ["- a woman cooking in the kitchen", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a car is driving on the road "], "question": "which noise is caused by a truck", "label": 1}, {"captions": ["a motor runs and stops, and animals squawk and croak", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["s4tUs779vBA", "tw76HGONaKg"], "start_seconds": ["160", "570"], "properties": ["a, sound, stop", "A, game, keyboard"], "captions_pred_video": ["game in 10 words or less - screenshot 1", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a car is revving and a man is speaking ", "a man speaks and types on a computer keyboard "], "question": "which entity has a keyboard?", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "wind blowing followed by a zoom"], "sample_ids": ["vzxHnu-SFEw", "vr8ZXjEBhMQ"], "start_seconds": ["80", "150"], "properties": ["two objects, woman, speak", "wind, blow, zoom"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "a propeller rotates loudly and intensely"], "sample_ids": ["wnpJndXuxLc", "ugHJF0hfYkg"], "start_seconds": ["50", "10"], "properties": ["beeps, loud, whistle", "loud, intense, propeller"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["an engine runs and a man speaks", "a weapon fires multiple times"], "sample_ids": ["yT5WfYMRr-U", "sMC07Ucy7kg"], "start_seconds": ["30", "10"], "properties": ["engine, run, man", "weapon, fire, multiple"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "footage is from a car's point of view"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["leaves rustle while man speaks", "a vehicle accelerates and squeals tires"], "sample_ids": ["zOZleIRqZm4", "yRx9txMcBl0"], "start_seconds": ["80", "40"], "properties": ["leaves, rustle, speak", "accelerates, tires, squeals"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a car is revving its engine and skidding "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "a car speeding up in the distance"], "sample_ids": ["wtDqrBygTcU", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["man, engine, run", "distance, car, speed"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", null], "captions_pred_audio": ["a man is speaking and a motor is running", "a race car accelerates and revs its engine "], "question": "which object is moving faster", "label": 0}, {"captions": ["a man speaks as a machine runs", "some men converse over an engine running"], "sample_ids": ["vD6lYD1l0BY", "sCiy7QS1U"], "start_seconds": ["330", "300"], "properties": ["a, machine, run", "men, converse, engine"], "captions_pred_video": ["game controller being held in the hands of the person", null], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a man speaking as a machine runs?", "label": 0}, {"captions": ["birds chirp quietly and an adult man speaks", "a machine clanks and thumps and a male speaks"], "sample_ids": ["zuua6-5goWw", "sWZzXuWYY"], "start_seconds": ["30", "420"], "properties": ["birds, chirp, quiet, man, speaks", "male, clanks, thumps"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", null], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a sewing machine runs and a man speaks"], "question": "which entity is more quiet", "label": 0}, {"captions": ["music plays followed by gunshots and then an explosion", "people applaud and hoot and chat quietly"], "sample_ids": ["xKB8O8LTs6s", "wwyfGO2J4"], "start_seconds": ["70", "90"], "properties": ["music, gunshots, explosion", "people, applaud, hoot"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", null], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "people are clapping and speaking with background noise "], "question": "which entity is more peaceful", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "water flows and trickles"], "sample_ids": ["ugHJF0hfYkg", "tB7hWb9gTuQ"], "start_seconds": ["10", "30"], "properties": ["loud, intense, propeller", "water, flow, trickle"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a helicopter is flying overhead ", "water is splashing and gurgling"], "question": "which entity is more quiet", "label": 1}, {"captions": ["male speech with light ticking", "an airplane engine spools and people speak"], "sample_ids": ["xO-Q2BlIIPU", "wTjoRj1se3U"], "start_seconds": ["30", "390"], "properties": ["male, speech, ticking", "airplane, engine, spool"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a jet engine is running and people are talking"], "question": "which entity is a video", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "a man speaks over intermittent keyboard taps"], "sample_ids": ["sWZzXuWYY", "tw76HGONaKg"], "start_seconds": ["420", "570"], "properties": ["male, clanks, thumps", "audio, man, keyboard"], "captions_pred_video": [null, "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a man speaks and types on a computer keyboard "], "question": "which entity has a man speaking over intermittent keyboard taps?", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a person snores loudly multiple times at a close distance"], "sample_ids": ["uzQnlJXBbOM", "sSMl2vc3ek"], "start_seconds": ["50", "20"], "properties": ["ringing, beep, stop", "loud, multiple, distance"], "captions_pred_video": ["footage of a person using a cell phone on a table", null], "captions_pred_audio": ["a telephone rings and a man speaks", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["someone is snoring while sleeping", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["ujMt0-D-x2k", "sLUnaPT5gM8"], "start_seconds": ["0", "0"], "properties": ["snore, sleep, someone", "loud, laughter, intermittent"], "captions_pred_video": ["of the dog playing with a toy on the floor", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a person is snoring loudly", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["tQWGZLItBXk", "uYT5gxnyMWM"], "start_seconds": ["170", "50"], "properties": ["music, person, ding", "female, spraying, scream"], "captions_pred_video": ["worms revolution screenshots", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a woman is speaking and a baby is crying"], "question": "which entity has a person speaking?", "label": 0}, {"captions": ["a machine engine runs and a man speaks", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vs65y4qmyBE", "su6FAOcOA8c"], "start_seconds": ["340", "4"], "properties": ["engine, run, man", "engine, idle, woman"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a woman is speaking and a subway train is moving "], "question": "which entity has a man speaking while an engine runs?", "label": 0}, {"captions": ["water gurgles, metal squeaks and the water stops", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["x4a9YGIw4ok", "xKB8O8LTs6s"], "start_seconds": ["120", "70"], "properties": ["water, gurgles, stops", "music, gunfire, explosion"], "captions_pred_video": ["footage is blurry and out of focus", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a toilet flushes and water splashes", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "water is sprayed across a hard surface"], "sample_ids": ["xM4joTqDVp4", "sQwlkXjQabo"], "start_seconds": ["160", "10"], "properties": ["background, chirp, birds", "water, spray, surface"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["birds are chirping and a train is moving ", "spraying followed by silence"], "question": "which entity is a video of a train chugging?", "label": 0}, {"captions": ["a man speaking with light rustling", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["zOZleIRqZm4", "wz7N8YRy74I"], "start_seconds": ["80", "30"], "properties": ["light, rustling, man", "rooster, crow, background, men"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["x5cuQjOdM3E", "tw76HGONaKg"], "start_seconds": ["30", "570"], "properties": ["cat, meows, young woman", "A, game, keyboard"], "captions_pred_video": ["a black background with an airplane flying in the sky", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man speaks and types on a computer keyboard "], "question": "which entity is playing a video game", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "wind blows strongly and a young man speaks"], "sample_ids": ["xBxDz0CFVn0", "vs65y4qmyBE"], "start_seconds": ["30", "340"], "properties": ["stream, water, flow", "wind, blows, strongly"], "captions_pred_video": ["footage is blurry and out of focus", "a car is engulfed in flames on the side of the road"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a heavy engine is running and men are speaking "], "question": "which entity is a stream of water flowing as people talk and wind blows?", "label": 0}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "people speak as gunfire rings out"], "sample_ids": ["xyL9F5VrjkE", "wqTCwqVRDlk"], "start_seconds": ["20", "80"], "properties": ["wind, motor, distance", "gunfire, ring, speak"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "a large crowd cheers and applauds"], "sample_ids": ["y1saVTXsKwc", "rqfQRErjfk8"], "start_seconds": ["80", "170"], "properties": ["a, dog, talk", "crowd, cheers, applauds"], "captions_pred_video": ["a dog playing with a pink ball", "a man hugging another man in front of an orchestra"], "captions_pred_audio": ["a dog barks and a man speaks", "a crowd of people clapping and cheering"], "question": "which entity is more active", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["vZAw4apG0Es", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["people, clock, converse", "loud, laughter, intermittent"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a clock is ticking and people are talking", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["rustling with distant murmuring", "a car speeding up in the distance"], "sample_ids": ["wnNNcxAPwGQ", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["sound, distance, rustling", "distance, car, speed"], "captions_pred_video": ["footage of a yellow truck doing a burnout on a race track", null], "captions_pred_audio": ["a crowd of people are talking and laughing while a skateboard rolls by ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["an engine runs loudly", "a clock ticktocks"], "sample_ids": ["vqZuVbG6-HI", "v-g-j2uTByM"], "start_seconds": ["130", "30"], "properties": ["loud, engine, run", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage is blurry because it's raining outside", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a clock is ticking loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["wTjoRj1se3U", "ukg5L09Wpvo"], "start_seconds": ["390", "150"], "properties": ["engine, run, people", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a jet engine is running and people are talking", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a car speeding up in the distance", "a clock ticktocks"], "sample_ids": ["u0TrcHhkPQ", "v-g-j2uTByM"], "start_seconds": ["20", "30"], "properties": ["distance, car, speed", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["paper is crumpling consistently", "a toilet flushes and a female speaks"], "sample_ids": ["v5cSxLaHADY", "yaln9y8I7ms"], "start_seconds": ["0", "230"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "female, flushes, toilet"], "captions_pred_video": ["footage of the person holding a pair of scissors", "footage is blurry and out of focus"], "captions_pred_audio": ["paper is crumpled and crinkled", "a toilet flushes and a man speaks"], "question": "which entity is a video", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "a man speaks followed by another man speaking outside"], "sample_ids": ["tEE3MpBt1sg", "viuTg1M-dqg"], "start_seconds": ["50", "30"], "properties": ["drill, something, laugh", "two men, speak, follow"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man is speaking with background noise and breathing sounds "], "question": "which entity shows two men speaking?", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["tQWGZLItBXk", "uZesmtKZGSw"], "start_seconds": ["170", "250"], "properties": ["music, person, ding", "men, talk, cars"], "captions_pred_video": ["worms revolution screenshots", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "winds blows roughly as a vehicle races past"], "sample_ids": ["sDSppXIlJrs", "xjvTpk2Zpr8"], "start_seconds": ["27", "70"], "properties": ["microphone, water, wind", "wind, blows, vehicle"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a jet engine roars and wind blows "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a low rumbling in the distance followed by a motorcycle engine revving up", "people speak in the background as a clock ticktocks"], "sample_ids": ["vr8ZXjEBhMQ", "vZAw4apG0Es"], "start_seconds": ["150", "30"], "properties": ["sound, distance, engine", "background, clock, ticktocks"], "captions_pred_video": ["is taken from a motorcycle's point of view", "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a clock is ticking and people are talking"], "question": "which entity has a clock ticking in the background?", "label": 1}, {"captions": ["frogs croak and vocalize", "women speak and laugh as wind blows"], "sample_ids": ["yswmmRZFItk", "un9VQlzgZM"], "start_seconds": ["0", "5"], "properties": ["croak, vocalize, frog", "wind, speak, laugh"], "captions_pred_video": ["a close up of a frog in the water", null], "captions_pred_audio": ["a frog is croaking", "a woman is speaking and laughing with wind noise and breathing in the background "], "question": "which entity is a human", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vBslzh7saPw", "vJ7JPEFhyLA"], "start_seconds": ["90", "16"], "properties": ["power, scream, increase", "three men, wind, flow"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "an airplane engine spools and people speak"], "sample_ids": ["wTideSjRFS0", "wTjoRj1se3U"], "start_seconds": ["30", "390"], "properties": ["food, sizzle, woman", "airplane, engine, spool"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a jet engine is running and people are talking"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["a toilet flushes and water drains", "multiple people speak and children yell while water gurgles"], "sample_ids": ["sfAvvZwdLCY", "vb1fPSDI4c"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "multiple, people, yell"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a crowd of people are talking and laughing"], "question": "which entity has more water", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["w5W5Kqtc8E", "wDVMhEdTiVw"], "start_seconds": ["100", "30"], "properties": ["wind, engine, scream", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "water splashes as an animal walks through"], "sample_ids": ["vbpKkWvfOu4", "w1ir-sZ3Im8"], "start_seconds": ["560", "90"], "properties": ["a, man, speaks", "animal, water, splashes"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["speaking following by laughing and clapping", "small dogs yip and bark sharply"], "sample_ids": ["u2f5NpsoHBg", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["person, laugh, clap", "bark, yip, sharply"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a weapon fires multiple times", "a machine beeps continuously"], "sample_ids": ["sMC07Ucy7kg", "y682ml90jGw"], "start_seconds": ["10", "11"], "properties": ["weapon, fire, multiple", "beeps, machine, continuously"], "captions_pred_video": ["footage is from a car's point of view", null], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a beeping sound is being made "], "question": "which entity is not a weapon", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a motor runs in the distance as a soft wind periodically gusts"], "sample_ids": ["vBHyYJ8pL0", "xyL9F5VrjkE"], "start_seconds": ["2", "20"], "properties": ["noise, door, opening", "wind, motor, distance"], "captions_pred_video": [null, "of a caterpillar truck loading logs into a trailer"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "the wind is blowing and a car is passing by "], "question": "which entity is accompanied by a door opening and closing", "label": 0}, {"captions": ["a propeller rotates loudly and intensely", "a man speaks as a motor runs in the background"], "sample_ids": ["ugHJF0hfYkg", "xZepNM9qcRA"], "start_seconds": ["10", "30"], "properties": ["loud, intense, propeller", "background, motor, run"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man speaks while a motorcycle revs and accelerates "], "question": "which is quieter", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["weDbePuc-Xc", "wqZ135Ssz0"], "start_seconds": ["40", "60"], "properties": ["music, slaps, human", "two men, woman, birds"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", null], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "water pouring and bubbling"], "sample_ids": ["x9JovgqUcs", "uyRfq-jKPpo"], "start_seconds": ["500", "50"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man speaks and types on a keyboard", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a person snoring", "water splashes as an animal walks through"], "sample_ids": ["t8tv5YRMJUg", "w1ir-sZ3Im8"], "start_seconds": ["0", "90"], "properties": ["a person, snore, loud", "animal, water, splashes"], "captions_pred_video": ["of a man getting his face licked by another man", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a person sniffs and breathes heavily", "water splashes and gurgles as people speak"], "question": "which entity is not a person?", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["wqN6IIHw3po", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["rain, surface, fall", "rooster, crow, background, men"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and water is splashing", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "water is sprayed across a hard surface"], "sample_ids": ["uYT5gxnyMWM", "sQwlkXjQabo"], "start_seconds": ["50", "10"], "properties": ["female, spraying, scream", "water, spray, surface"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "spraying followed by silence"], "question": "which entity is sprayed across a hard surface", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["tK4VlLsNxak", "wqZ135Ssz0"], "start_seconds": ["120", "60"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "two men, woman, birds"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", null], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "a duck quacks continuously"], "sample_ids": ["wvKpEYswXO0", "vh30P49Po6s"], "start_seconds": ["150", "30"], "properties": ["water, tap, run", "quacks, continuously, duck"], "captions_pred_video": ["of the person preparing food in the kitchen", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "people speak as gunfire rings out"], "sample_ids": ["zFjIWfSD-4", "wqTCwqVRDlk"], "start_seconds": ["410", "80"], "properties": ["People, motor, brakes", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["food is frying and sizzles", "a car accelerates and wind blows"], "sample_ids": ["zNRChLjqcU", "u0TrcHhkPQ"], "start_seconds": ["220", "20"], "properties": ["food is frying, sizzles, food", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running from a faucet into a sink", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "multiple people speak and children yell while water gurgles"], "sample_ids": ["sNB8zxXneIM", "vb1fPSDI4c"], "start_seconds": ["20", "30"], "properties": ["several, quack, cocks", "multiple, people, yell"], "captions_pred_video": ["a group of geese in a cage", null], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a drill runs and two people laugh", "water flows and trickles"], "sample_ids": ["tEE3MpBt1sg", "tB7hWb9gTuQ"], "start_seconds": ["50", "30"], "properties": ["two people, laugh, drill", "water, flow, trickle"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["birds chirp then an animal grunts", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["tDlysoZiA1I", "wDVMhEdTiVw"], "start_seconds": ["0", "30"], "properties": ["animal, grunt, chirp", "gun, shoot, water"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a toilet flushes and water drains", "a man speaks as a motor runs in the background"], "sample_ids": ["sfAvvZwdLCY", "xZepNM9qcRA"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "background, motor, run"], "captions_pred_video": ["footage of the toilet in the bathroom", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a toilet is flushed", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["someone snores nearby", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["spJCm8tD9Zo", "wqZ135Ssz0"], "start_seconds": ["90", "60"], "properties": ["someone snores, nearby, someone", "two men, woman, birds"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a man woman speak while crickets sing", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["zTLVJCo4WEE", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["a, crickets, sing", "beeps, hit, woman"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["heavy rain splashes as it falls", "a child speaks in closed space"], "sample_ids": ["wP8ZKrlx3oA", "yW6FWLSLkx4"], "start_seconds": ["40", "40"], "properties": ["fall, rain, splash", "child, space, speak"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is not a splash", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["wz7N8YRy74I", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["rooster, crow, background, men", "a woman, a television program, a bird"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a rooster?", "label": 0}, {"captions": ["a clock ticktocks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["v-g-j2uTByM", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, ticktocks", "stream, water, flow"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "footage is blurry and out of focus"], "captions_pred_audio": ["a clock is ticking loudly", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["frogs croak and vocalize", "a man speaks as a car is passing by"], "sample_ids": ["yswmmRZFItk", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["croak, vocalize, frog", "a, car, pass"], "captions_pred_video": ["a close up of a frog in the water", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a frog is croaking", "a man is speaking with background noise and breathing sounds "], "question": "which entity is not a frog?", "label": 1}, {"captions": ["a train horn blows as it passes by", "a man speaks as a car is passing by"], "sample_ids": ["zVacuqSb4LI", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["horn, blows, train", "a, car, pass"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a man is speaking with background noise and breathing sounds "], "question": "which is a moving object", "label": 0}, {"captions": ["a vehicle engine accelerates and wind blows", "an infant crying frantically"], "sample_ids": ["wudZTNBtVqc", "zwOBqeFTgiU"], "start_seconds": ["60", "30"], "properties": ["accelerates, engine, wind", "cry, infant, frantically"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "of the baby crying in the car seat"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a baby cries loudly"], "question": "which entity is a human", "label": 1}, {"captions": ["goats bleat and people speak", "a frog croaks as other frogs croak in the background"], "sample_ids": ["z5iUE5h0EPs", "yswmmRZFItk"], "start_seconds": ["30", "0"], "properties": ["goats bleat, people speak, language", "background, frog, croak"], "captions_pred_video": ["of the goat in the barn", "a close up of a frog in the water"], "captions_pred_audio": ["a goat bleats and a man speaks", "a frog is croaking"], "question": "which entity is a single animal", "label": 1}, {"captions": ["a helicopter engine runs continuously", "small dogs growl, bark and yip."], "sample_ids": ["ugHJF0hfYkg", "sShpyu2l4YQ"], "start_seconds": ["10", "0"], "properties": ["engine, running, continuously", "growl, bark, yip"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "the puppies are playing with a toy"], "captions_pred_audio": ["a helicopter is flying overhead ", "a dog is barking and growling"], "question": "which entity is more likely to be running continuously", "label": 0}, {"captions": ["a weapon fires multiple times", "a toilet flushes and a female speaks"], "sample_ids": ["sMC07Ucy7kg", "yaln9y8I7ms"], "start_seconds": ["10", "230"], "properties": ["weapon, fire, multiple", "female, flushes, toilet"], "captions_pred_video": ["footage is from a car's point of view", "footage is blurry and out of focus"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a toilet flushes and a man speaks"], "question": "which entity is not a weapon", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "an infant crying as a woman laughs"], "sample_ids": ["vzxHnu-SFEw", "xhmRY9yhC7c"], "start_seconds": ["80", "20"], "properties": ["two objects, woman, speak", "a, laugh, infant"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a baby cries and a woman speaks"], "question": "which woman is a mother", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vZAw4apG0Es", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["background, clock, ticktocks", "loud, multiple, distance"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "a man speaks over a running engine and blowing wind"], "sample_ids": ["wtDqrBygTcU", "ylpYOorfH4o"], "start_seconds": ["30", "410"], "properties": ["man, engine, run", "engine, running, wind"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15"], "captions_pred_audio": ["a man is speaking and a motor is running", "a man is speaking and an engine is revving"], "question": "which entity is a boat?", "label": 0}, {"captions": ["an adult woman and an adult man speak", "a man speaks as a motor runs in the background"], "sample_ids": ["zTLVJCo4WEE", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["two people, adult, speak", "background, motor, run"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "birds chirp as a man speaks and a younger person speaks"], "sample_ids": ["smDKStoHBJo", "xl2PIWyXaM"], "start_seconds": ["0", "160"], "properties": ["a, talk, baby, cry", "chirp, man, younger person"], "captions_pred_video": ["a man holding a crying baby in his arms", null], "captions_pred_audio": ["a baby is crying and a woman is speaking", "birds are chirping and people are talking"], "question": "which entity has a baby?", "label": 0}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vddP56-ogds", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["water, splash, person, laugh", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a woman is speaking and a baby is crying"], "question": "which entity has a person speaking?", "label": 0}, {"captions": ["food is frying while a woman speaks", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["yhQ2Lg-7qDY", "uEU-Hg5MTN8"], "start_seconds": ["130", "27"], "properties": ["food, woman, speak", "animal, grunts, snorts"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a woman is speaking and a baby is crying"], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a man speaks as bees buzz and birds chirp", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["t25U-v4k4ts", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["bees buzz, birds chirp, man speaks", "a woman, laughs, animal"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking and an animal snorting?", "label": 1}, {"captions": ["a small engine spits as it runs", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sZvwOuuPGP0", "uYT5gxnyMWM"], "start_seconds": ["50", "50"], "properties": ["spits, engine, runs", "female, spraying, scream"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a medium engine is running ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["electronic beeps occur in a short series", "a car speeding up in the distance"], "sample_ids": ["y682ml90jGw", "u0TrcHhkPQ"], "start_seconds": ["11", "20"], "properties": ["beeps, series, electronic", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "a stream of water runs briefly"], "sample_ids": ["vmrxwuAMb2I", "x-PeY8Yb8M4"], "start_seconds": ["40", "300"], "properties": ["a dog, inhales, exhales", "stream, water, run"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a dog barks and growls", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a baby cries and a woman speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["tMbMDvT50j8", "zl9Dqx-j7q4"], "start_seconds": ["12", "6"], "properties": ["a, cry, woman", "engine, laugh, loud"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a baby cries and a woman speaks", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "water splashes as an animal walks through"], "sample_ids": ["s4Uz1Ffgo04", "w1ir-sZ3Im8"], "start_seconds": ["100", "90"], "properties": ["roars, background, people speaking", "animal, water, splashes"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "water splashes and gurgles as people speak"], "question": "which entity is more quiet", "label": 1}, {"captions": ["people speak in a closed space", "a stream of water runs briefly"], "sample_ids": ["sTpirNYo8vQ", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["people, space, speak", "stream, water, run"], "captions_pred_video": ["of a man taking a selfie on a bus", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "a telephone rings followed by a woman talking"], "sample_ids": ["tGcFnX0GHI", "tGcFnX0GHI"], "start_seconds": ["0", "0"], "properties": ["ring, talk, woman", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a video of a woman talking?", "label": 0}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "wind blows as people chatter quietly"], "sample_ids": ["xyL9F5VrjkE", "xBxDz0CFVn0"], "start_seconds": ["20", "30"], "properties": ["wind, motor, distance", "wind, chatter, people"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage is blurry and out of focus"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "a stream of water runs briefly"], "sample_ids": ["uqFtmnhuqA8", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["a, b, c", "stream, water, run"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "water splashing and a person laughs in the distance then a man speaks nearby"], "sample_ids": ["vKrYfzleLB8", "vddP56-ogds"], "start_seconds": ["110", "30"], "properties": ["a, ring, gunshots", "water, splash, person, laugh"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", null], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "water is running and gurgling and a man is speaking"], "question": "which entity is more calm", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "a clock ticktocks"], "sample_ids": ["vdoxuJn9lTc", "v-g-j2uTByM"], "start_seconds": ["40", "30"], "properties": ["burp, loud, girl", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a child speaks followed by a burp", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "water splashes as an animal walks through"], "sample_ids": ["smDKStoHBJo", "w1ir-sZ3Im8"], "start_seconds": ["0", "90"], "properties": ["a, infant, speaking", "animal, water, splashes"], "captions_pred_video": ["a man holding a crying baby in his arms", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "water splashes and gurgles as people speak"], "question": "which entity is a video of an animal walking through water?", "label": 1}, {"captions": ["a railroad crossing bell rings as a train horn blows", "a horn rings out as a machine runs by"], "sample_ids": ["tZGN5a7ybxo", "slZLHwNbbt4"], "start_seconds": ["60", "300"], "properties": ["ring, train, horn", "a, horn, run"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a train is moving and blowing its horn ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "a door opens and birds chirp"], "sample_ids": ["vfYTJq7nU", "yeFvk9x0wWI"], "start_seconds": ["130", "30"], "properties": ["rustling, ducks, quack", "door, open, birds"], "captions_pred_video": [null, "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["a duck quacks and a woman speaks", "birds chirp in the background as a car drives by "], "question": "which entity is more quiet", "label": 1}, {"captions": ["children speak and play together", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["yVVP8XvWJTo", "su6FAOcOA8c"], "start_seconds": ["260", "4"], "properties": ["children, speak, play", "engine, idle, woman"], "captions_pred_video": ["footage of a playground at a school or daycare center", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["wvKpEYswXO0", "uEU-Hg5MTN8"], "start_seconds": ["150", "27"], "properties": ["water, tap, run", "a woman, laughs, animal"], "captions_pred_video": ["of the person preparing food in the kitchen", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a woman is speaking and a baby is crying"], "question": "which entity is about water?", "label": 0}, {"captions": ["continuous sizzling with a woman speaking towards the end", "a toilet flushes and a female speaks"], "sample_ids": ["ukxt9I7eMMg", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["continuous, woman, speaking", "female, flushes, toilet"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a toilet flushes and a man speaks"], "question": "which entity has a woman speaking towards the end?", "label": 0}, {"captions": ["a beep occurs briefly", "a woman speaks as she rubs two objects together"], "sample_ids": ["xtWeJ56-U-g", "vzxHnu-SFEw"], "start_seconds": ["20", "80"], "properties": ["beep, occur, briefly", "two objects, woman, speak"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "small dogs yip and bark sharply"], "sample_ids": ["se87d6yxEOA", "v-wcQf4BDY0"], "start_seconds": ["10", "120"], "properties": ["run, whistle, pass", "bark, yip, sharply"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["xl2PIWyXaM", "uEU-Hg5MTN8"], "start_seconds": ["160", "27"], "properties": ["chirp, man, younger person", "animal, grunts, snorts"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["birds are chirping and people are talking", "a woman is speaking and a baby is crying"], "question": "which entity has a more snorts", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["tIY7qOV3rEM", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "female, spraying, scream"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a toilet flushes and water drains"], "sample_ids": ["v0x1odnXtP0", "sfAvvZwdLCY"], "start_seconds": ["210", "20"], "properties": ["keyboard, type, computer", "water drains, flushes, water"], "captions_pred_video": ["how to make money on youtube in spanish", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a person is typing on a keyboard", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a dark barks and whimpers", "wind blows as people chatter quietly"], "sample_ids": ["sYj4hpDUZDQ", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["barks, whimpers, dark", "wind, chatter, people"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", "footage is blurry and out of focus"], "captions_pred_audio": ["a dog barks and a cat meows", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["vf9xf3vMsGM", "sLUnaPT5gM8"], "start_seconds": ["540", "0"], "properties": ["A man speaks while turning a water faucet on.", "loud, laughter, intermittent"], "captions_pred_video": ["of the person washing their hands under the faucet", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["birds chirp as a train approaches", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["xM4joTqDVp4", "vfYTJq7nU"], "start_seconds": ["160", "130"], "properties": ["bird, chirp, train", "rustling, ducks, quack"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", null], "captions_pred_audio": ["birds are chirping and a train is moving ", "a duck quacks and a woman speaks"], "question": "which entity is about birds?", "label": 0}, {"captions": ["water flows as men speak and yell", "a stream of water runs briefly"], "sample_ids": ["vJ7JPEFhyLA", "x-PeY8Yb8M4"], "start_seconds": ["16", "300"], "properties": ["water, flow, men", "stream, water, run"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["uEU-Hg5MTN8", "sLUnaPT5gM8"], "start_seconds": ["27", "0"], "properties": ["animal, grunts, snorts", "loud, laughter, intermittent"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["water gurgles, metal squeaks and the water stops", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["x4a9YGIw4ok", "vfYTJq7nU"], "start_seconds": ["120", "130"], "properties": ["water, gurgles, stops", "rustling, ducks, quack"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a toilet flushes and water splashes", "a duck quacks and a woman speaks"], "question": "which entity is about water?", "label": 0}, {"captions": ["a vehicle engine revs as the vehicle passes", "speaking following by laughing and clapping"], "sample_ids": ["yDoT73BWsdA", "u2f5NpsoHBg"], "start_seconds": ["10", "30"], "properties": ["engine, revs, vehicle", "person, laugh, clap"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "is being projected on a screen at the front of the stage"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking and a crowd is clapping"], "question": "which entity is a person", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "a vehicle accelerates before a race car idles then accelerates quickly"], "sample_ids": ["sSMl2vc3ek", "sjlVMgdGSK0"], "start_seconds": ["20", "30"], "properties": ["a person, laughs, snores", "accelerates, vehicle, race car"], "captions_pred_video": [null, "a 1965 ford falcon drag racing at 100mph on a 1/8 mile track"], "captions_pred_audio": ["a person snoring loudly", "a car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a person is burping while a girl speaks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vdoxuJn9lTc", "vfYTJq7nU"], "start_seconds": ["40", "130"], "properties": ["person, burp, girl", "rustling, ducks, quack"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", null], "captions_pred_audio": ["a child speaks followed by a burp", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "a woman speaks as she rubs two objects together"], "sample_ids": ["zFjIWfSD-4", "vzxHnu-SFEw"], "start_seconds": ["410", "80"], "properties": ["People, motor, brakes", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is about a woman speaking?", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "water splashes as an animal walks through"], "sample_ids": ["zALy31PjDl0", "w1ir-sZ3Im8"], "start_seconds": ["21", "90"], "properties": ["a man, a vehicle, a horn", "animal, water, splashes"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["bees buzz as wind blows", "a small engine idles continuously"], "sample_ids": ["tMJne1a4AFI", "y5WII6cTH7k"], "start_seconds": ["0", "40"], "properties": ["bees, buzz, wind", "engine, idle, continuously"], "captions_pred_video": ["a swarm of bees on the ground", "footage of a sewing machine stitching a red and white hat"], "captions_pred_audio": ["a swarm of bees buzzing around", "an engine is knocking and vibrating "], "question": "which entity is not a living thing", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "paper folding and crinkling"], "sample_ids": ["sjlVMgdGSK0", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["accelerates, vehicle, race car", "paper, fold, crinkle"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a car accelerates and revs its engine ", "the wind blows and a mouse clicks "], "question": "which is not a vehicle", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["uZesmtKZGSw", "vlS6YMeWAPo"], "start_seconds": ["250", "40"], "properties": ["men, talk, cars", "sheep, baa, birds"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "winds blows roughly as a vehicle races past"], "sample_ids": ["vlJS7LN2XyM", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["background, clocks, ticking", "wind, blows, vehicle"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a ticktock of a clock", "a jet engine roars and wind blows "], "question": "which entity is more active", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "a infant makes noise and is excited"], "sample_ids": ["t97k0cejSQE", "wIJK3-5y0kA"], "start_seconds": ["250", "30"], "properties": ["bird, chirp, insect", "noise, excited, infant"], "captions_pred_video": ["a bee on a purple thistle flower", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a toilet flushes and water drains", "a man speaks in the background while a slow tick repeats"], "sample_ids": ["sfAvvZwdLCY", "vZAw4apG0Es"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "background, tick, repeat"], "captions_pred_video": ["footage of the toilet in the bathroom", "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["a toilet is flushed", "a clock is ticking and people are talking"], "question": "which entity is a video of a toilet flushing?", "label": 0}, {"captions": ["pigeons vocalize and birds chirp", "a stream of water runs briefly"], "sample_ids": ["uiS58TNyUiw", "x-PeY8Yb8M4"], "start_seconds": ["430", "300"], "properties": ["vocalize, bird, chirp", "stream, water, run"], "captions_pred_video": ["of the pigeon in the cage", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a car is driving on a wet road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "an insect buzzes around continuously"], "sample_ids": ["ugHJF0hfYkg", "v25l1jef3JY"], "start_seconds": ["10", "0"], "properties": ["loud, intense, propeller", "buzzes, continuously, insect"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a helicopter is flying overhead ", "a fly is buzzing around a microphone "], "question": "which entity is quieter", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "a toilet flushes and a female speaks"], "sample_ids": ["soTOh3zYJfY", "yaln9y8I7ms"], "start_seconds": ["40", "230"], "properties": ["vehicle, skid, tires", "female, flushes, toilet"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "footage is blurry and out of focus"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["a person is snoring while sleeping", "dishes cling together then a man begins to speak"], "sample_ids": ["vJrjSeP17yE", "sQGXqGcwOTc"], "start_seconds": ["40", "3"], "properties": ["a person is sleeping, snoring, person", "cling, speak, dishes"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a person snoring loudly", "mechanisms are operating and water is splashing "], "question": "which entity is about a person speaking?", "label": 1}, {"captions": ["paper is crumpling consistently", "a saw finishes running as metal clings in the background"], "sample_ids": ["v5cSxLaHADY", "zofjfKhqLk8"], "start_seconds": ["0", "10"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "background, metal, clings"], "captions_pred_video": ["footage of the person holding a pair of scissors", "footage of a man using a machine to cut a piece of wood"], "captions_pred_audio": ["paper is crumpled and crinkled", "a large engine is running and a bell is ringing"], "question": "which entity is a video of a saw running?", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "paper folding and crinkling"], "sample_ids": ["y8WEcpOlT3I", "zPpG3RD8lSs"], "start_seconds": ["40", "20"], "properties": ["wind, speak, buffeting", "paper, fold, crinkle"], "captions_pred_video": ["on how to use a sewing machine youtube", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "the wind blows and a mouse clicks "], "question": "which entity is not a person", "label": 1}, {"captions": ["food is frying and sizzles", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zNRChLjqcU", "tdWhHV3X25Q"], "start_seconds": ["220", "60"], "properties": ["food is frying, sizzles, food", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["water is running from a faucet into a sink", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["vbr9mHKc8WM", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["noise, loudness, engine", "animal, grunts, snorts"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["an engine is idling", "a woman is speaking and a baby is crying"], "question": "which entity is quieter", "label": 0}, {"captions": ["race cars go around a track as a man commentates", "winds blows roughly as a vehicle races past"], "sample_ids": ["uZesmtKZGSw", "xjvTpk2Zpr8"], "start_seconds": ["250", "70"], "properties": ["car, track, man", "wind, blows, vehicle"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "some tunes played by whistling"], "sample_ids": ["w5W5Kqtc8E", "u6BnG6YZqJ4"], "start_seconds": ["100", "0"], "properties": ["wind, engine, scream", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a motorcycle engine works nearby", "winds blows roughly as a vehicle races past"], "sample_ids": ["tOSWIURC-4", "xjvTpk2Zpr8"], "start_seconds": ["0", "70"], "properties": ["engine, work, nearby", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a lawn mower is running ", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a man speaks while water drains", "an insect buzzes around continuously"], "sample_ids": ["vSeGhaZt-aI", "v25l1jef3JY"], "start_seconds": ["50", "0"], "properties": ["water, drain, man", "buzzes, continuously, insect"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["a railroad crossing bell rings as a train horn blows", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["tZGN5a7ybxo", "tiDFTC-5vU"], "start_seconds": ["60", "30"], "properties": ["ring, train, horn", "male, duck, laugh"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", null], "captions_pred_audio": ["a train is moving and blowing its horn ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["a clock ticktocks continuously", "a man sprays as a scraping occurs in the background"], "sample_ids": ["vlJS7LN2XyM", "sOa7g-44Dag"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, ticktocks continuously", "background, man, spray"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and rubbing his hands together "], "question": "which entity is a man?", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "a man makes an exclamation, then another man speaks"], "sample_ids": ["w2M4i1mklOA", "xO-Q2BlIIPU"], "start_seconds": ["30", "30"], "properties": ["loud, chime, bell", "two men, exclamation, speak"], "captions_pred_video": ["footage of an antique clock", "a clock with a green glowing display showing the time 09 07 2016 12 31 2016"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a man is speaking with background noise and breathing sounds "], "question": "which is quieter", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "water splashes and a door squeaks"], "sample_ids": ["vBHyYJ8pL0", "sdXV-ylviw"], "start_seconds": ["2", "190"], "properties": ["noise, door, opening", "sound, splash, door"], "captions_pred_video": [null, null], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a dog barks and taps with background noise "], "question": "which door is squeaking", "label": 1}, {"captions": ["a person is burping while a girl speaks", "vehicle tires screech and a man speaks before a car door opens"], "sample_ids": ["vdoxuJn9lTc", "sxYkFKFIZD0"], "start_seconds": ["40", "20"], "properties": ["person, burp, girl", "screech, man, door"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2"], "captions_pred_audio": ["a child speaks followed by a burp", "a man is speaking while a car is revving and accelerating with a squeal in the background "], "question": "which entity is about a car door opening?", "label": 1}, {"captions": ["ticking continues without interruption", "a male speaks and another male speaks"], "sample_ids": ["v-g-j2uTByM", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["ticking, continuous, clock", "two males, speaking, male"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a clock is ticking loudly", "a man is speaking with background noise and breathing sounds "], "question": "which entity is not continuous", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "water flows as men speak and yell"], "sample_ids": ["xKB8O8LTs6s", "vJ7JPEFhyLA"], "start_seconds": ["70", "16"], "properties": ["music, gunfire, explosion", "water, flow, men"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "paper is crumpling consistently"], "sample_ids": ["vbZ-0lGPneg", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["a woman, a television program, a bird", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "an insect buzzes around continuously"], "sample_ids": ["v7jJS8aAyA", "v25l1jef3JY"], "start_seconds": ["10", "0"], "properties": ["wind, blows, loudly", "buzzes, continuously, insect"], "captions_pred_video": [null, "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a fly is buzzing around a microphone "], "question": "which entity is not a motorcycle?", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "a child speaks in closed space"], "sample_ids": ["yajyRTUQk3U", "yW6FWLSLkx4"], "start_seconds": ["400", "40"], "properties": ["noise, woman, speak", "child, space, speak"], "captions_pred_video": ["- a woman cooking in the kitchen", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking over a noise", "label": 0}, {"captions": ["wind blows strongly", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["w8uLijTqtlU", "wDVMhEdTiVw"], "start_seconds": ["70", "30"], "properties": ["wind, blows, strongly", "gun, shoot, water"], "captions_pred_video": ["footage is blurry and shaky", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["the wind is blowing strongly", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is not a gun?", "label": 0}, {"captions": ["a person speaks over rustling leaves", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["zOZleIRqZm4", "wDVMhEdTiVw"], "start_seconds": ["80", "30"], "properties": ["rustling, leaves, person", "gun, shoot, water"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a person sniffles and sneezes", "water flows and trickles"], "sample_ids": ["uRlbY6aoBU", "tB7hWb9gTuQ"], "start_seconds": ["0", "30"], "properties": ["sneezes, sniffles, person", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is sneezing ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a toilet flushes and water drains", "a clock ticktocks"], "sample_ids": ["sfAvvZwdLCY", "v-g-j2uTByM"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of the toilet in the bathroom", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a toilet is flushed", "a clock is ticking loudly"], "question": "which entity is a timepiece", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "a child speaks in closed space"], "sample_ids": ["zALy31PjDl0", "yW6FWLSLkx4"], "start_seconds": ["21", "40"], "properties": ["a man, a vehicle, a horn", "child, space, speak"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["w2M4i1mklOA", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["alarm, gears, turn", "water, radio, man"], "captions_pred_video": ["footage of an antique clock", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a clock?", "label": 0}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "people cheer as a vehicle engine revs"], "sample_ids": ["vlJS7LN2XyM", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["background, clocks, ticking", "engine revs, vehicle, people"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a ticktock of a clock", "a truck is revving its engine and a man is speaking "], "question": "which entity is more active", "label": 1}, {"captions": ["ticking continues without interruption", "a duck quacks loudly and continuously"], "sample_ids": ["v-g-j2uTByM", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["ticking, continuous, clock", "loud, continuous, quacks"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a clock is ticking loudly", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["tDVADusiIoc", "wDVMhEdTiVw"], "start_seconds": ["60", "30"], "properties": ["water, radio, man", "gun, shoot, water"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is about shooting water?", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "a clock ticktocks"], "sample_ids": ["wtDqrBygTcU", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["man, engine, run", "ticktocks, clock, ticktocks"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and a motor is running", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "paper is crumpling consistently"], "sample_ids": ["voJh2gJxXhA", "v5cSxLaHADY"], "start_seconds": ["50", "0"], "properties": ["music, frog, croak", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["music is playing and crickets are chirping ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["water gurgles, metal squeaks and the water stops", "plastic is tapped on while someone speaks"], "sample_ids": ["x4a9YGIw4ok", "wvKpEYswXO0"], "start_seconds": ["120", "150"], "properties": ["water, gurgles, stops", "plastic, tap, speak"], "captions_pred_video": ["footage is blurry and out of focus", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a toilet flushes and water splashes", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks?", "label": 1}, {"captions": ["a mechanical buzzing getting louder", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["sEprKHm8Sj8", "xfaoyyzw2WU"], "start_seconds": ["90", "180"], "properties": ["noise, loud, buzzing", "loud, jet engine, roar"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "an aircraft engine roars and a man speaks "], "question": "which noise is louder", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "an airplane engine runs"], "sample_ids": ["sOa7g-44Dag", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["background, man, spray", "engine, airplane, runs"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a man speaks while water drains", "wind blows as people chatter quietly"], "sample_ids": ["vSeGhaZt-aI", "xBxDz0CFVn0"], "start_seconds": ["50", "30"], "properties": ["water, drain, man", "wind, chatter, people"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "some tunes played by whistling"], "sample_ids": ["vW4x7S1VfQc", "u6BnG6YZqJ4"], "start_seconds": ["150", "0"], "properties": ["clacking, oil, woman", "tune, play, whistling"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["food sizzles in a frying pan", "a person whistling a song"], "question": "which entity is playing tunes", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["xjhAnI2q6hM", "yajyRTUQk3U"], "start_seconds": ["6", "400"], "properties": ["engine revs, vehicle, people", "a woman, something, fried"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["y2bVZ7rz-5M", "vlS6YMeWAPo"], "start_seconds": ["280", "40"], "properties": ["motor noise, horn, siren", "sheep, baa, birds"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a goat bleats and birds chirp"], "question": "which entity is followed by a horn honking and a siren wailing", "label": 0}, {"captions": ["a person is snoring while sleeping", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["vJrjSeP17yE", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["a person is sleeping, snoring, person", "animal, grunts, snorts"], "captions_pred_video": ["a black background with a small plane flying in the sky", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 0}, {"captions": ["an airplane engine runs", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["yVPZ2MNWpms", "zFjIWfSD-4"], "start_seconds": ["0", "410"], "properties": ["engine, airplane, runs", "People, motor, brakes"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", null], "captions_pred_audio": ["a car is driving by on the road ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "paper is crumpling consistently"], "sample_ids": ["vdoxuJn9lTc", "v5cSxLaHADY"], "start_seconds": ["40", "0"], "properties": ["burp, loud, girl", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a child speaks followed by a burp", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["children speak and play together", "someone snores nearby"], "sample_ids": ["yVVP8XvWJTo", "spJCm8tD9Zo"], "start_seconds": ["260", "90"], "properties": ["children, speak, play", "someone snores, nearby, someone"], "captions_pred_video": ["footage of a playground at a school or daycare center", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a car accelerates and wind blows"], "sample_ids": ["yLy-WycbVVE", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["background, people, talk", "accelerates, wind, blows"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", null], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["waves crash against a shoreline and people speak", "two women and a man talk while a kid cries"], "sample_ids": ["yFB25fqfU8I", "wyllXV6PjKo"], "start_seconds": ["300", "30"], "properties": ["wave, crash, shoreline", "a kid, talk, cry"], "captions_pred_video": ["footage of a person surfing in the ocean", null], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "a woman speaks and a baby cries"], "question": "which entity is more calm", "label": 1}, {"captions": ["a person screams glaringly", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["xC8kbrKJmco", "tiDFTC-5vU"], "start_seconds": ["0", "30"], "properties": ["glaringly, screams, person", "male, duck, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a goat is bleating ", "a man is speaking and ducks are quacking"], "question": "which entity is a person?", "label": 0}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "a vehicle accelerates and squeals tires"], "sample_ids": ["uRExseg-0XI", "yRx9txMcBl0"], "start_seconds": ["210", "40"], "properties": ["woman, man, water", "accelerates, tires, squeals"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a car is revving its engine and skidding "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["an adult woman and an adult man speak", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["zTLVJCo4WEE", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["two people, adult, speak", "music, gunfire, explosion"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman speaks and crickets chirp", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["people speak and tapping occurs", "water running down a sink while a man is talking"], "sample_ids": ["tFCUUGdREgA", "vSeGhaZt-aI"], "start_seconds": ["70", "50"], "properties": ["people, tap, speak", "water, sink, talk"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a man is speaking and pouring liquid with background noise "], "question": "which entity is a video of water running down a sink while a man is talking?", "label": 1}, {"captions": ["music plays followed by gunshots and then an explosion", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["xKB8O8LTs6s", "wDVMhEdTiVw"], "start_seconds": ["70", "30"], "properties": ["music, gunshots, explosion", "gun, shoot, water"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity has a gunshot followed by an explosion?", "label": 0}, {"captions": ["multiple people speak and children yell while water gurgles", "a infant makes noise and is excited"], "sample_ids": ["vb1fPSDI4c", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["multiple, people, yell", "noise, excited, infant"], "captions_pred_video": [null, "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks over intermittent keyboard taps", "multiple people speak and children yell while water gurgles"], "sample_ids": ["tw76HGONaKg", "vb1fPSDI4c"], "start_seconds": ["570", "30"], "properties": ["audio, man, keyboard", "multiple, people, yell"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", null], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "people cheer as a vehicle engine revs"], "sample_ids": ["rwTERCUno", "xjhAnI2q6hM"], "start_seconds": ["90", "6"], "properties": ["engine, idle, sputter", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["an engine is idling and vibrating", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "water flows as men speak and yell"], "sample_ids": ["s4Uz1Ffgo04", "vJ7JPEFhyLA"], "start_seconds": ["100", "16"], "properties": ["water, rushes, motorcycle", "water, flow, men"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["multiple insects buzz over rustling wind", "water splashes as an animal walks through"], "sample_ids": ["tMJne1a4AFI", "w1ir-sZ3Im8"], "start_seconds": ["0", "90"], "properties": ["wind, buzz, rustling", "animal, water, splashes"], "captions_pred_video": ["a swarm of bees on the ground", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a swarm of bees buzzing around", "water splashes and gurgles as people speak"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "water flows as men speak and yell"], "sample_ids": ["w34HjHr6gAY", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["beeps, hit, woman", "water, flow, men"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more water flowing", "label": 1}, {"captions": ["a vehicle engine runs while a siren and horn sound", "a whistling owl calls out repeatedly and insects screech"], "sample_ids": ["u--KhUW8l1Y", "w6RTHR6AeAg"], "start_seconds": ["0", "40"], "properties": ["engine, sound, horn", "call, owl, screech"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", null], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "an owl hoots and mechanisms operate "], "question": "which entity is a bird?", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["uRExseg-0XI", "xfaoyyzw2WU"], "start_seconds": ["210", "180"], "properties": ["woman, man, water", "loud, jet engine, roar"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["vVhthZ45k3Y", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["cat, purr, hiss", "harsh, wind, blows"], "captions_pred_video": ["footage is blurry and out of focus", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a man is speaking with wind noise in the background "], "question": "which entity is more likely to be a natural phenomenon", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["wudZTNBtVqc", "tDVADusiIoc"], "start_seconds": ["60", "60"], "properties": ["accelerates, engine, wind", "water, radio, man"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "wind blowing and birds chirping with the distant cooing of a large bird"], "sample_ids": ["w5W5Kqtc8E", "wRBHTgrbiwg"], "start_seconds": ["100", "50"], "properties": ["water, splashes, motorboat", "birds, chirp, cooing"], "captions_pred_video": [null, "of a bee pollinating the flowers in the field"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "birds are chirping and insects are buzzing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "a man speaks followed by another man speaking outside"], "sample_ids": ["wvKpEYswXO0", "viuTg1M-dqg"], "start_seconds": ["150", "30"], "properties": ["plastic, tap, speak", "two men, speak, follow"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["someone snores nearby", "a woman speaks as she rubs two objects together"], "sample_ids": ["spJCm8tD9Zo", "vzxHnu-SFEw"], "start_seconds": ["90", "80"], "properties": ["someone snores, nearby, someone", "two objects, woman, speak"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["soTOh3zYJfY", "vbZ-0lGPneg"], "start_seconds": ["40", "30"], "properties": ["vehicle, skid, tires", "a woman, a television program, a bird"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["food fries in a pan as someone talks and cooks", "a man speaks as a car is passing by"], "sample_ids": ["ukxt9I7eMMg", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["food, pan, cook", "a, car, pass"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "someone whistles a tune"], "sample_ids": ["w34HjHr6gAY", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["beeps, hit, woman", "someone, tune, whistle"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", null], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "an engine runs loudly"], "sample_ids": ["vbpKkWvfOu4", "vqZuVbG6-HI"], "start_seconds": ["560", "130"], "properties": ["a, man, speaks", "loud, engine, run"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a lawn mower is running and men are speaking "], "question": "which is louder", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "someone is typing on a computer keyboard"], "sample_ids": ["su6FAOcOA8c", "v0x1odnXtP0"], "start_seconds": ["4", "210"], "properties": ["engine, idle, woman", "keyboard, type, computer"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "how to make money on youtube in spanish"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a person is typing on a keyboard"], "question": "which is not a person", "label": 0}, {"captions": ["a speedboat passes quickly on the water", "dishes cling together then a man begins to speak"], "sample_ids": ["tjmoSi330GM", "sQGXqGcwOTc"], "start_seconds": ["23", "3"], "properties": ["speed, water, boat", "cling, speak, dishes"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "mechanisms are operating and water is splashing "], "question": "which entity is a still image?", "label": 0}, {"captions": ["a clock ticktocks continuously", "a woman speaks as she rubs two objects together"], "sample_ids": ["vlJS7LN2XyM", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["ticktocks, clock, ticktocks continuously", "two objects, woman, speak"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a ticktock of a clock", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a clock", "label": 0}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "some light rustling followed by a loud burp and a girl speaking"], "sample_ids": ["s59PfAghdkM", "vdoxuJn9lTc"], "start_seconds": ["0", "40"], "properties": ["bird, chirp, background, horse, neigh", "burp, loud, girl"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "a group of young girls playing a video game together in a living room"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a child speaks followed by a burp"], "question": "which entity has a horse in it?", "label": 0}, {"captions": ["birds chirp and pigeons vocalize while walking around", "running water in a faucet with some clinks"], "sample_ids": ["wIvYjuR3nrg", "zNRChLjqcU"], "start_seconds": ["9", "220"], "properties": ["birds, pigeons, vocalize", "water, faucet, run"], "captions_pred_video": ["footage of a pigeon sitting on a roof with trees in the background", null], "captions_pred_audio": ["birds are chirping and cooing", "water is running from a faucet into a sink"], "question": "which entity is silent", "label": 1}, {"captions": ["a person screams glaringly", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["xC8kbrKJmco", "xfaoyyzw2WU"], "start_seconds": ["0", "180"], "properties": ["glaringly, screams, person", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a goat is bleating ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "a vehicles accelerate quickly and someone laughs"], "sample_ids": ["wP8ZKrlx3oA", "uWPRNLnpy7Y"], "start_seconds": ["40", "10"], "properties": ["rain, storm, thunder", "accelerate, laugh, vehicle"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "is taken from a car driving down the street"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a car accelerates and revs its engine "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "water rushes by"], "sample_ids": ["s4Uz1Ffgo04", "x-PeY8Yb8M4"], "start_seconds": ["100", "300"], "properties": ["water, rushes, vehicle", "water, rushes, by"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a car is driving on a wet road "], "question": "which entity is a video of water rushing by?", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["wqADXCzngMw", "uEU-Hg5MTN8"], "start_seconds": ["340", "27"], "properties": ["engine, idle, man", "animal, grunts, snorts"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "an airplane engine runs"], "sample_ids": ["yaln9y8I7ms", "yVPZ2MNWpms"], "start_seconds": ["230", "0"], "properties": ["female, flushes, toilet", "engine, airplane, runs"], "captions_pred_video": ["footage is blurry and out of focus", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a car is driving by on the road "], "question": "which entity is a machine", "label": 1}, {"captions": ["leaves rustle while man speaks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["zOZleIRqZm4", "vb1fPSDI4c"], "start_seconds": ["80", "30"], "properties": ["leaves, rustle, speak", "multiple, people, yell"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a crowd of people are talking and laughing"], "question": "which entity is more active", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a clock ticktocks in wind"], "sample_ids": ["yLy-WycbVVE", "yVumC9TGknc"], "start_seconds": ["30", "30"], "properties": ["background, people, talk", "ticktocks, clock, wind"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "game title screen of the game shadow of the colossus on sony playstation 2"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a series of beeps and chirps"], "question": "which entity is silent", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "an airplane engine runs"], "sample_ids": ["zofjfKhqLk8", "yVPZ2MNWpms"], "start_seconds": ["10", "0"], "properties": ["noise, stop, motor", "engine, airplane, runs"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a car is driving by on the road "], "question": "which entity is a machine", "label": 1}, {"captions": ["running water in a faucet with some clinks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zNRChLjqcU", "yajyRTUQk3U"], "start_seconds": ["220", "400"], "properties": ["water, faucet, run", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["water is running from a faucet into a sink", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a speedboat passes quickly on the water", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["tjmoSi330GM", "vfYTJq7nU"], "start_seconds": ["23", "130"], "properties": ["speed, water, boat", "rustling, ducks, quack"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", null], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "a duck quacks and a woman speaks"], "question": "which entity is moving at a slower speed", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "a telephone rings followed by a woman talking"], "sample_ids": ["wRV8yMk886E", "tGcFnX0GHI"], "start_seconds": ["0", "0"], "properties": ["liquid, spray, nozzle", "ring, talk, woman"], "captions_pred_video": ["two cars are parked in a parking lot at night", null], "captions_pred_audio": ["a man speaks followed by a loud burst", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["some men converse over an engine running", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sCiy7QS1U", "uZesmtKZGSw"], "start_seconds": ["300", "250"], "properties": ["men, converse, engine", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity shows men talking about cars?", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vlJS7LN2XyM", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["background, clocks, ticking", "a, scream, girl"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a ticktock of a clock", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a door slams shut roughly", "a woman speaks as she rubs two objects together"], "sample_ids": ["zkKdxzNC97Y", "vzxHnu-SFEw"], "start_seconds": ["27", "80"], "properties": ["a door, slams, shut", "two objects, woman, speak"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a door is opened and closed", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a skill", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["tDlysoZiA1I", "xKB8O8LTs6s"], "start_seconds": ["0", "70"], "properties": ["animal, grunt, multiple", "music, gunfire, explosion"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["uYT5gxnyMWM", "tiDFTC-5vU"], "start_seconds": ["50", "30"], "properties": ["female, spraying, scream", "male, duck, laugh"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["a man speaks while water drains", "paper is crumpling consistently"], "sample_ids": ["vSeGhaZt-aI", "v5cSxLaHADY"], "start_seconds": ["50", "0"], "properties": ["water, drain, man", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man talks as several small engines run", "a clock ticktocks"], "sample_ids": ["u9A6VZQCZpU", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["a, man, talk", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["rain falls on a surface as men speak and thunder roars", "people applaud and hoot and chat quietly"], "sample_ids": ["w0xsN8X18Y", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["rain, thunder, surface", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be a performance", "label": 1}, {"captions": ["white noise and birds chirping", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["wRBHTgrbiwg", "tDlysoZiA1I"], "start_seconds": ["50", "0"], "properties": ["noise, white, chirping", "animal, grunts, chirps"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "birds are chirping and a rooster is crowing "], "question": "which entity is more like a song", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["sK4u5T8hW78", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["a, car, pass", "harsh, wind, blows"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with wind noise in the background "], "question": "which entity is more likely to be in a car", "label": 0}, {"captions": ["two men speak as a buffeting wind blows", "a toilet flushes and a female speaks"], "sample_ids": ["y8WEcpOlT3I", "yaln9y8I7ms"], "start_seconds": ["40", "230"], "properties": ["wind, speak, buffeting", "female, flushes, toilet"], "captions_pred_video": ["on how to use a sewing machine youtube", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "a man speaks while a machine runs before a smoke alarm beeps"], "sample_ids": ["tDVADusiIoc", "sG7TyPnFDR0"], "start_seconds": ["60", "180"], "properties": ["man, radio, blows", "beeps, machine, smoke alarm"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "a person is using an espresso machine in a restaurant"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking and a microwave oven is beeping "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["someone is typing on a computer keyboard", "water flows and trickles"], "sample_ids": ["v0x1odnXtP0", "tB7hWb9gTuQ"], "start_seconds": ["210", "30"], "properties": ["keyboard, type, computer", "water, flow, trickle"], "captions_pred_video": ["how to make money on youtube in spanish", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a person is typing on a keyboard", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "wind blows as people chatter quietly"], "sample_ids": ["ukxt9I7eMMg", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["food, pan, cook", "wind, chatter, people"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vZAw4apG0Es", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["people, clock, converse", "wind, blow, vehicle"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blowing?", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "waves crash against a shoreline and people speak"], "sample_ids": ["y8WEcpOlT3I", "yFB25fqfU8I"], "start_seconds": ["40", "300"], "properties": ["harsh, wind, blows", "wave, crash, shoreline"], "captions_pred_video": ["on how to use a sewing machine youtube", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more calm", "label": 1}, {"captions": ["a woman speaks with water running", "a vehicle engine accelerating then running on idle"], "sample_ids": ["wTideSjRFS0", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["water, running, woman", "engine, accelerate, idle"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wvKpEYswXO0", "sSMl2vc3ek"], "start_seconds": ["150", "20"], "properties": ["water, tap, run", "loud, multiple, distance"], "captions_pred_video": ["of the person preparing food in the kitchen", null], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["w5W5Kqtc8E", "yDoT73BWsdA"], "start_seconds": ["100", "10"], "properties": ["wind, blow, vehicle", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a race car accelerates and revs its engine "], "question": "which vehicle is moving", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "someone whistles a tune"], "sample_ids": ["yJ0TePmaOo", "sIXTftIuUgw"], "start_seconds": ["390", "90"], "properties": ["two hard objects, man, speak", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a person whistling a song"], "question": "which is a musical instrument", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "crowd applause while a guy laughs followed by another man speaking"], "sample_ids": ["uzQnlJXBbOM", "tDlfY3nmx1A"], "start_seconds": ["50", "160"], "properties": ["ringing, beep, stop", "applause, laugh, man"], "captions_pred_video": ["footage of a person using a cell phone on a table", "a man in a suit and tie is talking to another man in a suit and tie"], "captions_pred_audio": ["a telephone rings and a man speaks", "a crowd is clapping and laughing and a man is speaking "], "question": "which entity is a performance", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "several insects fly while two men talk"], "sample_ids": ["vb1fPSDI4c", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["multiple, people, yell", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more people", "label": 0}, {"captions": ["music plays followed by gunshots and then an explosion", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["xKB8O8LTs6s", "uYT5gxnyMWM"], "start_seconds": ["70", "50"], "properties": ["music, gunshots, explosion", "a, scream, girl"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "plastic is tapped on while someone speaks"], "sample_ids": ["sAam2NqGhLY", "wvKpEYswXO0"], "start_seconds": ["20", "150"], "properties": ["snoring, breathing, child", "plastic, tap, speak"], "captions_pred_video": ["of a little girl sleeping on a couch", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a person is snoring", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is not a person", "label": 1}, {"captions": ["women speak and laugh as wind blows", "multiple people speak and children yell while water gurgles"], "sample_ids": ["un9VQlzgZM", "vb1fPSDI4c"], "start_seconds": ["5", "30"], "properties": ["wind, speak, laugh", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["xZepNM9qcRA", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["background, motor, run", "loud, laughter, intermittent"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a motorcycle engine is idling", "a infant makes noise and is excited"], "sample_ids": ["vZAqdHZ81yA", "wIJK3-5y0kA"], "start_seconds": ["180", "30"], "properties": ["engine, motorcycle, idling", "noise, excited, infant"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["an engine is idling loudly", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["xSKJGCItUWE", "uYT5gxnyMWM"], "start_seconds": ["10", "50"], "properties": ["engine, run, boy", "a, scream, girl"], "captions_pred_video": ["footage of the helicopter flying in the room", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["yJ0TePmaOo", "vfYTJq7nU"], "start_seconds": ["390", "130"], "properties": ["two hard objects, man, speak", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a duck quacks and a woman speaks"], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a jet engine roars, almost making a man inaudible", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["xfaoyyzw2WU", "vlS6YMeWAPo"], "start_seconds": ["180", "40"], "properties": ["loud, jet engine, roar", "sheep, baa, birds"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a goat bleats and birds chirp"], "question": "which entity is quieter", "label": 1}, {"captions": ["rustling with distant murmuring", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["wnNNcxAPwGQ", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["sound, distance, rustling", "a woman, something, fried"], "captions_pred_video": ["footage of a yellow truck doing a burnout on a race track", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a crowd of people are talking and laughing while a skateboard rolls by ", "a woman is speaking while food is frying in the background"], "question": "which entity is a video", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "water flows and trickles"], "sample_ids": ["wvKpEYswXO0", "tB7hWb9gTuQ"], "start_seconds": ["150", "30"], "properties": ["plastic, tap, speak", "water, flow, trickle"], "captions_pred_video": ["of the person preparing food in the kitchen", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "water is splashing and gurgling"], "question": "which entity is flowing", "label": 1}, {"captions": ["goats bleat and metal clings", "a man speaks as a car is passing by"], "sample_ids": ["tH17JPjDPnc", "sK4u5T8hW78"], "start_seconds": ["260", "30"], "properties": ["bleat, metal, clings", "a, car, pass"], "captions_pred_video": ["feed of the goats eating hay in the barn", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "people cheer as a vehicle engine revs"], "sample_ids": ["xjvTpk2Zpr8", "xjhAnI2q6hM"], "start_seconds": ["70", "6"], "properties": ["engine, run, wind", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a truck is revving its engine and a man is speaking "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["birds chirp and a dog breathes heavily", "an infant crying frantically"], "sample_ids": ["y2ZBGpgbhHM", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["dog, chirp, breathe", "cry, infant, frantically"], "captions_pred_video": [null, "of the baby crying in the car seat"], "captions_pred_audio": ["birds chirping and a dog panting", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a toilet flushes and water drains", "a person snores loudly multiple times at a close distance"], "sample_ids": ["sfAvvZwdLCY", "sSMl2vc3ek"], "start_seconds": ["20", "20"], "properties": ["water drains, flushes, water", "loud, multiple, distance"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["zuua6-5goWw", "vlS6YMeWAPo"], "start_seconds": ["30", "40"], "properties": ["birds, chirp, quiet, man, speaks", "sheep, baa, birds"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a goat bleats and birds chirp"], "question": "which entity is more active", "label": 1}, {"captions": ["a helicopter engine runs", "a man speaks followed by another man speaking outside"], "sample_ids": ["t5ZbXbniOWk", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["engine, helicopter, run", "two men, speak, follow"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of two men speaking?", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["tw76HGONaKg", "wz7N8YRy74I"], "start_seconds": ["570", "30"], "properties": ["A, game, keyboard", "rooster, crow, background, men"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["vehicle engines race around a track as a man commentates", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sZPuqDgX2V0", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["commentator, race, track", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["an infant crying frantically", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zwOBqeFTgiU", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["cry, infant, frantically", "a woman, something, fried"], "captions_pred_video": ["of the baby crying in the car seat", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a baby cries loudly", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "people applaud and hoot and chat quietly"], "sample_ids": ["yZrFNS7GFBQ", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["pigeon, buzzes, insect", "people, applaud, hoot"], "captions_pred_video": ["of the bird in the cage", null], "captions_pred_audio": ["an owl hoots in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be a performance", "label": 1}, {"captions": ["material crumbles into a microphone", "paper is crumpling consistently"], "sample_ids": ["vofpvUo6NAw", "v5cSxLaHADY"], "start_seconds": ["220", "0"], "properties": ["material, crumbles, microphone", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["paper is being crumpled and crinkled", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "a grown man speaks and water bubbles and runs"], "sample_ids": ["yaln9y8I7ms", "vSeGhaZt-aI"], "start_seconds": ["230", "50"], "properties": ["female, flushes, toilet", "water, bubbles, run"], "captions_pred_video": ["footage is blurry and out of focus", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a man is speaking and pouring liquid with background noise "], "question": "which entity has a female speaking?", "label": 0}, {"captions": ["two women and a man talk while a kid cries", "a diesel truck engine runs continuously"], "sample_ids": ["wyllXV6PjKo", "sZvwOuuPGP0"], "start_seconds": ["30", "50"], "properties": ["a kid, talk, cry", "engine, diesel, truck"], "captions_pred_video": [null, "of a bulldozer clearing a road in a forest stock footage and royalty-free videos"], "captions_pred_audio": ["a woman speaks and a baby cries", "a medium engine is running "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "here comes the train and it starts to blow the horn and get close"], "sample_ids": ["xERFUeZONz8", "s7knHCFW82w"], "start_seconds": ["0", "30"], "properties": ["ring, approach, traffic", "blow horn, get close, train"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "footage of the train on the tracks near a building and a car parked on the side of the road"], "captions_pred_audio": ["an emergency vehicle siren blares", "a train is blowing its horn and its wheels are squealing "], "question": "which entity is a train?", "label": 1}, {"captions": ["a door opens and birds chirp", "water splashes and a door squeaks"], "sample_ids": ["yeFvk9x0wWI", "sdXV-ylviw"], "start_seconds": ["30", "190"], "properties": ["door, open, birds", "sound, splash, door"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", null], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a dog barks and taps with background noise "], "question": "which door is squeaking", "label": 1}, {"captions": ["a clock ticktocks continuously", "water splashes as an animal walks through"], "sample_ids": ["vlJS7LN2XyM", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["ticktocks, clock, ticktocks continuously", "animal, water, splashes"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a ticktock of a clock", "water splashes and gurgles as people speak"], "question": "which entity is a moving object", "label": 1}, {"captions": ["some clanking with distant murmuring", "small dogs yip and bark sharply"], "sample_ids": ["uMTTDZ2mb4", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["clanking, murmuring, distant", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "a stream of water runs briefly"], "sample_ids": ["u7C-AEBQM", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["ticks, rhythmic, quiet", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a ticktock of a clock", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "plastic is tapped on while someone speaks"], "sample_ids": ["xZepNM9qcRA", "wvKpEYswXO0"], "start_seconds": ["30", "150"], "properties": ["background, motor, run", "plastic, tap, speak"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a woman is speaking and tapping with background noise and water running "], "question": "which entity has a motor running in the background", "label": 0}, {"captions": ["a man speaks uses a drill", "a male speaks and another male speaks"], "sample_ids": ["x5eIC7S0fbg", "viuTg1M-dqg"], "start_seconds": ["60", "30"], "properties": ["A man is speaking, uses a drill, and is a tool", "two males, speaking, male"], "captions_pred_video": ["a person in surgical gloves is using a needle to remove a small object from a tooth", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking and using a power tool ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more males speaking", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["xBxDz0CFVn0", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["stream, water, flow", "a woman, something, fried"], "captions_pred_video": ["footage is blurry and out of focus", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "some tunes played by whistling"], "sample_ids": ["zY3icUyMdh8", "u6BnG6YZqJ4"], "start_seconds": ["20", "0"], "properties": ["dog, bark, engine", "tune, play, whistling"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a person whistling a song"], "question": "which entity is playing a tune", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "some men converse over an engine running"], "sample_ids": ["tDVADusiIoc", "sCiy7QS1U"], "start_seconds": ["60", "300"], "properties": ["wind, radio, waves", "men, converse, engine"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a conversation?", "label": 1}, {"captions": ["a door opens and birds chirp", "a door opens and closes"], "sample_ids": ["yeFvk9x0wWI", "vBHyYJ8pL0"], "start_seconds": ["30", "2"], "properties": ["door, open, birds", "open, close, door"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", null], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which door opens and closes", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "water pouring and bubbling"], "sample_ids": ["uYT5gxnyMWM", "uyRfq-jKPpo"], "start_seconds": ["50", "50"], "properties": ["female, spraying, scream", "water, bubbles, pouring"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["wqADXCzngMw", "vfYTJq7nU"], "start_seconds": ["340", "130"], "properties": ["engine, idle, man", "rustling, ducks, quack"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", null], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a duck quacks and a woman speaks"], "question": "which entity is about a man talking?", "label": 0}, {"captions": ["an electronic device bleeps once", "some men converse over an engine running"], "sample_ids": ["tHJ6JSa8Y4", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["bleeps, electronic, device", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a clock is ticking and beeping", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "people applaud and hoot and chat quietly"], "sample_ids": ["uZesmtKZGSw", "wwyfGO2J4"], "start_seconds": ["250", "90"], "properties": ["car, track, man", "people, applaud, hoot"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", null], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be at a concert", "label": 1}, {"captions": ["a goat screams and people speak in the background", "some clanking with distant murmuring"], "sample_ids": ["xC8kbrKJmco", "uMTTDZ2mb4"], "start_seconds": ["0", "30"], "properties": ["background, goat, scream", "clanking, murmuring, distant"], "captions_pred_video": [null, null], "captions_pred_audio": ["a goat is bleating ", "people are talking and a car is driving by with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a motorcycle engine is idling", "a vehicle engine accelerating then running on idle"], "sample_ids": ["vZAqdHZ81yA", "vYkA3cfXp5Q"], "start_seconds": ["180", "30"], "properties": ["engine, motorcycle, idling", "engine, accelerate, idle"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["an engine is idling loudly", "an engine is idling"], "question": "which entity has an engine that is idling", "label": 0}, {"captions": ["a man speaks as crickets sing", "paper is crumpling consistently"], "sample_ids": ["ryFDPxgDOGc", "v5cSxLaHADY"], "start_seconds": ["570", "0"], "properties": ["a, crickets, sing", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "vehicles pass by on a roadway"], "sample_ids": ["y2ZBGpgbhHM", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["animal, growl, bird", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["birds chirping and a dog panting", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["weDbePuc-Xc", "vlS6YMeWAPo"], "start_seconds": ["40", "40"], "properties": ["cartoon character, music, vocalize", "sheep, baa, birds"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a goat bleats and birds chirp"], "question": "which entity is more likely to be a video of a sheep baaing?", "label": 1}, {"captions": ["heavy rain splashes as it falls", "an infant crying as a woman laughs"], "sample_ids": ["wP8ZKrlx3oA", "xhmRY9yhC7c"], "start_seconds": ["40", "20"], "properties": ["fall, rain, splash", "a, laugh, infant"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "water splashes as an animal walks through"], "sample_ids": ["x6ijhqRY38s", "w1ir-sZ3Im8"], "start_seconds": ["250", "90"], "properties": ["something metal, glass, hit", "animal, water, splashes"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "water splashes and gurgles as people speak"], "question": "which entity is more likely to cause water to splash", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "a fly buzzes around loudly as birds chirp"], "sample_ids": ["tK4VlLsNxak", "uJV8NDaHqqk"], "start_seconds": ["120", "100"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "loud, fly, chirp"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "a bee hive in a wooden box"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a swarm of bees buzzing around"], "question": "which entity is louder", "label": 1}, {"captions": ["a toilet flushes and water drains", "people cheer as a vehicle engine revs"], "sample_ids": ["sfAvvZwdLCY", "xjhAnI2q6hM"], "start_seconds": ["20", "6"], "properties": ["water drains, flushes, water", "engine revs, vehicle, people"], "captions_pred_video": ["footage of the toilet in the bathroom", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a toilet is flushed", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "some men converse over an engine running"], "sample_ids": ["w2JXXIAdUdg", "sCiy7QS1U"], "start_seconds": ["10", "300"], "properties": ["snoring, distance, person", "men, converse, engine"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", null], "captions_pred_audio": ["a person snoring and a dog whimpering", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a person snoring?", "label": 0}, {"captions": ["a person whistles and clicks a mouse", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["zCrAfDfv6-A", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["person, mouse, click", "beeps, hit, woman"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a person whistles a song", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a person whistles a meandering tune", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["uFoga8sHpiw", "tiDFTC-5vU"], "start_seconds": ["90", "30"], "properties": ["person, tune, whistle", "male, duck, laugh"], "captions_pred_video": ["footage of a bird in a cage", null], "captions_pred_audio": ["a person whistles a song", "a man is speaking and ducks are quacking"], "question": "which entity is a person", "label": 0}, {"captions": ["a train horn sounds as it passes by", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["ukg5L09Wpvo", "zj2R0XoFr5k"], "start_seconds": ["150", "50"], "properties": ["sound, train, horn", "airplane, boy, fly"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a large crowd cheers and applauds", "a toilet flushes and water drains"], "sample_ids": ["rqfQRErjfk8", "sfAvvZwdLCY"], "start_seconds": ["170", "20"], "properties": ["crowd, cheers, applauds", "water drains, flushes, water"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "a horn blows as a train chugs along and warning bells ring"], "sample_ids": ["vmrxwuAMb2I", "ukg5L09Wpvo"], "start_seconds": ["40", "150"], "properties": ["a dog, inhales, exhales", "a train, a horn, a bell"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a dog barks and growls", "a train blows its whistle and blows its horn "], "question": "which entity is a train", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "a toilet flushes and a female speaks"], "sample_ids": ["yks4cLgIDMc", "yaln9y8I7ms"], "start_seconds": ["170", "230"], "properties": ["background, speaking, child", "female, flushes, toilet"], "captions_pred_video": ["footage of two kids wrestling on the floor", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a child is crying", "a toilet flushes and a man speaks"], "question": "which entity has a child shouting in the background", "label": 0}, {"captions": ["two men speak as a buffeting wind blows", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["y8WEcpOlT3I", "wz7N8YRy74I"], "start_seconds": ["40", "30"], "properties": ["wind, speak, buffeting", "rooster, crow, background, men"], "captions_pred_video": ["on how to use a sewing machine youtube", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has more people", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "an airplane accelerates briefly"], "sample_ids": ["zkKdxzNC97Y", "zjTG0gaGCUI"], "start_seconds": ["27", "80"], "properties": ["hard, surface, door", "accelerates, airplane, briefly"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "a jet engine roars as wind blows "], "question": "which object is moving", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["su6FAOcOA8c", "zFjIWfSD-4"], "start_seconds": ["4", "410"], "properties": ["engine, idle, woman", "People, motor, brakes"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", null], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["waves crash against a shoreline and wind blows", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["zdYdyF9-m8U", "uYT5gxnyMWM"], "start_seconds": ["7", "50"], "properties": ["wind, crash, shoreline", "female, spraying, scream"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["waves crash and wind blows ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["weDbePuc-Xc", "w34HjHr6gAY"], "start_seconds": ["40", "30"], "properties": ["cartoon character, music, vocalize", "beeps, hit, woman"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["vehicles pass by on a roadway", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["tgbONvsP47Y", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["pass, vehicle, roadway", "a woman, laughs, animal"], "captions_pred_video": ["footage of a fire truck entering a garage", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a car is driving on the road ", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "an airplane accelerates briefly"], "sample_ids": ["s4Uz1Ffgo04", "zjTG0gaGCUI"], "start_seconds": ["100", "80"], "properties": ["roars, background, people speaking", "accelerates, airplane, briefly"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a jet engine roars as wind blows "], "question": "which entity is moving", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "rain falls onto a hard surface and thunder roars before music plays"], "sample_ids": ["sZPuqDgX2V0", "xNMovAf3o50"], "start_seconds": ["30", "0"], "properties": ["engine, accelerate, intercom", "rain, thunder, music"], "captions_pred_video": [null, "tieng mua - the falling rain lynk lee"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "thunder and rain with music playing in the background "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a person is burping while a girl speaks", "a woman and man speak while food is frying"], "sample_ids": ["vdoxuJn9lTc", "zk-xJGQU8-4"], "start_seconds": ["40", "130"], "properties": ["person, burp, girl", "food, man, woman"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "a man and a woman cooking in a wok on the stove"], "captions_pred_audio": ["a child speaks followed by a burp", "a woman is speaking while dishes are clanging and music is playing in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["wwyfGO2J4", "yDoT73BWsdA"], "start_seconds": ["90", "10"], "properties": ["people, applaud, hoot", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "people cheer as a vehicle engine revs"], "sample_ids": ["slZLHwNbbt4", "xjhAnI2q6hM"], "start_seconds": ["300", "6"], "properties": ["clap, distance, horn", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "a car accelerates and wind blows"], "sample_ids": ["zhx6hoYrHeI", "u0TrcHhkPQ"], "start_seconds": ["160", "20"], "properties": ["engine, sputter, rough", "accelerates, wind, blows"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["waves crash against a shoreline and people speak", "several insects fly while two men talk"], "sample_ids": ["yFB25fqfU8I", "s-T9OVOiMLo"], "start_seconds": ["300", "330"], "properties": ["wave, crash, shoreline", "several, fly, men"], "captions_pred_video": ["footage of a person surfing in the ocean", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more likely to be seen in a nature documentary", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "small dogs yip and bark sharply"], "sample_ids": ["xOZfdgAgJ9o", "v-wcQf4BDY0"], "start_seconds": ["40", "120"], "properties": ["woman, whimpering, speaking", "bark, yip, sharply"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "a stream of water flows as people talk and wind blows"], "sample_ids": ["t8CV69hcvF0", "xBxDz0CFVn0"], "start_seconds": ["210", "30"], "properties": ["person, sneeze, follow", "stream, water, flow"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman sneezes and speaks", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing?", "label": 1}, {"captions": ["an airplane engine roars increasingly louder", "wind blows as people chatter quietly"], "sample_ids": ["vBslzh7saPw", "xBxDz0CFVn0"], "start_seconds": ["90", "30"], "properties": ["engine, roar, louder", "wind, chatter, people"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage is blurry and out of focus"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man is speaking with wind noise in the background "], "question": "which is quieter", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "an emergency vehicle engine runs then a horn blows and siren sounds"], "sample_ids": ["xZepNM9qcRA", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["background, motor, run", "engine, horn, siren"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a truck is honking its horn and a siren is blaring "], "question": "which entity has a horn and siren?", "label": 1}, {"captions": ["a machine beeps continuously", "water splashes as an animal walks through"], "sample_ids": ["y682ml90jGw", "w1ir-sZ3Im8"], "start_seconds": ["11", "90"], "properties": ["beeps, machine, continuously", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a beeping sound is being made ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a frog vocalizes while birds chirp", "a woman speaks happily and an animal chirps"], "sample_ids": ["vMf1dLD6Sng", "uWAAAL4CIoc"], "start_seconds": ["6", "0"], "properties": ["frog, bird, vocalize", "a woman, chirps, animal"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", null], "captions_pred_audio": ["a frog croaks loudly", "a woman is speaking and a dog is barking "], "question": "which entity is a person", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "vehicle engines race around a track as a man commentates"], "sample_ids": ["vZAw4apG0Es", "sZPuqDgX2V0"], "start_seconds": ["30", "30"], "properties": ["background, clock, ticktocks", "commentator, race, track"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking and a helicopter is flying overhead "], "question": "which entity is a video of a race?", "label": 1}, {"captions": ["a machine runs continuously", "an engine runs loudly"], "sample_ids": ["wdXV3Pv0jiY", "vqZuVbG6-HI"], "start_seconds": ["11", "130"], "properties": ["machine, running, continuously", "loud, engine, run"], "captions_pred_video": ["footage is blurry and shaky", "footage is blurry because it's raining outside"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a lawn mower is running and men are speaking "], "question": "which machine is running continuously", "label": 0}, {"captions": ["bees buzz as wind blows", "an aircraft engine runs"], "sample_ids": ["tMJne1a4AFI", "yLCORCnd35Q"], "start_seconds": ["0", "0"], "properties": ["bees, buzz, wind", "engine, aircraft, runs"], "captions_pred_video": ["a swarm of bees on the ground", "a lufthansa airbus a380 landing at london's heathrow airport"], "captions_pred_audio": ["a swarm of bees buzzing around", "a train is moving and its wheels are squealing "], "question": "which entity is moving", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["uJV8NDaHqqk", "vlS6YMeWAPo"], "start_seconds": ["100", "40"], "properties": ["loud, fly, chirp", "sheep, baa, birds"], "captions_pred_video": ["a bee hive in a wooden box", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a swarm of bees buzzing around", "a goat bleats and birds chirp"], "question": "which entity is a sheep?", "label": 1}, {"captions": ["vehicles pass by on a roadway", "a child speaks in closed space"], "sample_ids": ["tgbONvsP47Y", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["pass, vehicle, roadway", "child, space, speak"], "captions_pred_video": ["footage of a fire truck entering a garage", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a car is driving on the road ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "repeated tapping is accompanied by water running and a woman speaking softly"], "sample_ids": ["y2bVZ7rz-5M", "wvKpEYswXO0"], "start_seconds": ["280", "150"], "properties": ["motor noise, horn, siren", "sound, water, running"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a woman is speaking and tapping with background noise and water running "], "question": "which entity has a horn honking?", "label": 0}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "a motorcycle engine works nearby"], "sample_ids": ["sjlVMgdGSK0", "tOSWIURC-4"], "start_seconds": ["30", "0"], "properties": ["accelerates, vehicle, race car", "engine, work, nearby"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a lawn mower is running "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["a person sniffs and sneezes", "an infant crying as a woman laughs"], "sample_ids": ["uRlbY6aoBU", "xhmRY9yhC7c"], "start_seconds": ["0", "20"], "properties": ["sneezes, person, sniffs", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is sneezing ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 0}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "birds chirps while a siren signals in the distance"], "sample_ids": ["tDVADusiIoc", "uKCSGgof8gI"], "start_seconds": ["60", "12"], "properties": ["wind, radio, waves", "chirps, distance, signal"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of a street in a small town on a sunny day"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a truck is accelerating and revving its engine "], "question": "which entity is more quiet", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "several insects fly while two men talk"], "sample_ids": ["sLUnaPT5gM8", "s-T9OVOiMLo"], "start_seconds": ["0", "330"], "properties": ["loud, laughter, intermittent", "several, fly, men"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more likely to be a video of insects flying?", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "a infant makes noise and is excited"], "sample_ids": ["uPDn2BFTHk", "wIJK3-5y0kA"], "start_seconds": ["140", "30"], "properties": ["lady, laugh, baby", "noise, excited, infant"], "captions_pred_video": [null, "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a baby cries and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a male speaks over some small clicks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["uXxVebHsGZ8", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["male, clicks, speak", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "people applaud and hoot and chat quietly"], "sample_ids": ["ylpYOorfH4o", "wwyfGO2J4"], "start_seconds": ["410", "90"], "properties": ["motor, run, steady", "people, applaud, hoot"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", null], "captions_pred_audio": ["a man is speaking and an engine is revving", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["goats bleat and people speak", "water quietly rushes by while birds chirp in the background"], "sample_ids": ["z5iUE5h0EPs", "sYITalLZjj4"], "start_seconds": ["30", "30"], "properties": ["goats bleat, people speak, language", "water, rushes, background, birds"], "captions_pred_video": ["of the goat in the barn", "two ducks are swimming in the water near each other"], "captions_pred_audio": ["a goat bleats and a man speaks", "wind blows and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["uPDn2BFTHk", "vbZ-0lGPneg"], "start_seconds": ["140", "30"], "properties": ["lady, laugh, baby", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program?", "label": 1}, {"captions": ["an insect buzzes around continuously", "pigeons vocalize and birds chirp"], "sample_ids": ["v25l1jef3JY", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["buzzes, continuously, insect", "vocalize, bird, chirp"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "of the pigeon in the cage"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "a child speaks in closed space"], "sample_ids": ["zgUgkpk78xU", "yW6FWLSLkx4"], "start_seconds": ["70", "40"], "properties": ["clinking, humming, horn", "child, space, speak"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a man speaks as horns blow", "a man speaks as a motor runs in the background"], "sample_ids": ["tHyNqRyK34A", "xZepNM9qcRA"], "start_seconds": ["24", "30"], "properties": ["a, man, speaks", "background, motor, run"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["wvKpEYswXO0", "xfaoyyzw2WU"], "start_seconds": ["150", "180"], "properties": ["sound, water, running", "loud, jet engine, roar"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "a telephone rings followed by a woman talking"], "sample_ids": ["un9VQlzgZM", "tGcFnX0GHI"], "start_seconds": ["5", "0"], "properties": ["females, talk, laugh", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "a horn blasts loudly as a train passes"], "sample_ids": ["vhJWZheqaE", "zsLxS-uLJTw"], "start_seconds": ["0", "20"], "properties": ["water drains unevenly, toilet flushes, water drains", "horn, blast, train"], "captions_pred_video": [null, "footage of the train on the tracks at sunrise or sunset"], "captions_pred_audio": ["a toilet is flushed", "a train blows its horn and moves on the tracks "], "question": "which entity is louder", "label": 1}, {"captions": ["a woman talking as an infant is crying", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["tMbMDvT50j8", "tiDFTC-5vU"], "start_seconds": ["12", "30"], "properties": ["a, talk, infant", "male, duck, laugh"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking and ducks are quacking"], "question": "which entity is about a person talking?", "label": 0}, {"captions": ["a child and woman laughs and the woman speaks", "distant men speak as a spray can nozzle is depressed"], "sample_ids": ["uPDn2BFTHk", "rwtmaKiCcQU"], "start_seconds": ["140", "30"], "properties": ["woman, laughs, speaks", "nozzle, depressed, spray can"], "captions_pred_video": [null, "shows a man spraying paint on a wall with a spray gun"], "captions_pred_audio": ["a baby laughs and a woman speaks", "spraying and people speaking"], "question": "which entity is about a spray can?", "label": 1}, {"captions": ["a man speaks while water drains", "plastic is tapped on while someone speaks"], "sample_ids": ["vSeGhaZt-aI", "wvKpEYswXO0"], "start_seconds": ["50", "150"], "properties": ["water, drain, man", "plastic, tap, speak"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is a video of someone speaking?", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "pigeons vocalize and birds chirp"], "sample_ids": ["zofjfKhqLk8", "uiS58TNyUiw"], "start_seconds": ["10", "430"], "properties": ["background, metal, clings", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "of the pigeon in the cage"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a power tool runs and touches a surface", "an infant crying as a woman laughs"], "sample_ids": ["zfvPRf3chY", "xhmRY9yhC7c"], "start_seconds": ["290", "20"], "properties": ["power tool, run, touch", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking while a power tool is being used ", "a baby cries and a woman speaks"], "question": "which is not a person", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "a saw finishes running as metal clings in the background"], "sample_ids": ["wtDqrBygTcU", "zofjfKhqLk8"], "start_seconds": ["30", "10"], "properties": ["man, engine, run", "background, metal, clings"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "footage of a man using a machine to cut a piece of wood"], "captions_pred_audio": ["a man is speaking and a motor is running", "a large engine is running and a bell is ringing"], "question": "which entity is about a man speaking as a boat engine runs?", "label": 0}, {"captions": ["a toilet flushes and water sputters as it drains", "a clock alarm sounds and gears turn"], "sample_ids": ["smGI3C1NZc", "w2M4i1mklOA"], "start_seconds": ["30", "30"], "properties": ["water, drain, toilet", "alarm, gears, turn"], "captions_pred_video": [null, "footage of an antique clock"], "captions_pred_audio": ["a toilet is flushed", "a clock is ticking and a bell is ringing "], "question": "which entity is a clock?", "label": 1}, {"captions": ["a person screams glaringly", "a propeller rotates loudly and intensely"], "sample_ids": ["xC8kbrKJmco", "ugHJF0hfYkg"], "start_seconds": ["0", "10"], "properties": ["glaringly, screams, person", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a goat is bleating ", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["a person snoring several times", "a baby laugh at a sputter"], "sample_ids": ["spJCm8tD9Zo", "sLUnaPT5gM8"], "start_seconds": ["90", "0"], "properties": ["snore, person, several", "laugh, sputter, baby"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a person is snoring loudly", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is a baby?", "label": 1}, {"captions": ["a person is snoring while sleeping", "a child speaks in closed space"], "sample_ids": ["vJrjSeP17yE", "yW6FWLSLkx4"], "start_seconds": ["40", "40"], "properties": ["a person is sleeping, snoring, person", "child, space, speak"], "captions_pred_video": ["a black background with a small plane flying in the sky", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["wyllXV6PjKo", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["a kid, talk, cry", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman speaks and a baby cries", "a duck quacks and a woman speaks"], "question": "which entity has a kid?", "label": 0}, {"captions": ["a person is snoring while sleeping", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vJrjSeP17yE", "uZesmtKZGSw"], "start_seconds": ["40", "250"], "properties": ["a person is sleeping, snoring, person", "men, talk, cars"], "captions_pred_video": ["a black background with a small plane flying in the sky", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a piece of wood is being placed down and sawed", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["uiItxDsDMFI", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["wood, piece, saw", "two men, woman, birds"], "captions_pred_video": ["a man cutting a log with an axe in the woods", null], "captions_pred_audio": ["a saw is being used with background noise ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a still image?", "label": 0}, {"captions": ["a stream of water flows as people talk and wind blows", "a car speeding up in the distance"], "sample_ids": ["xBxDz0CFVn0", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["stream, water, flow", "distance, car, speed"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["zj2R0XoFr5k", "sLUnaPT5gM8"], "start_seconds": ["50", "0"], "properties": ["airplane, fly, overhead", "loud, laughter, intermittent"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a power tool runs and touches a surface", "a clock ticktocks"], "sample_ids": ["zfvPRf3chY", "v-g-j2uTByM"], "start_seconds": ["290", "30"], "properties": ["power tool, run, touch", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking while a power tool is being used ", "a clock is ticking loudly"], "question": "which entity is not a clock?", "label": 0}, {"captions": ["a person snoring several times", "a person snores loudly multiple times at a close distance"], "sample_ids": ["spJCm8tD9Zo", "sSMl2vc3ek"], "start_seconds": ["90", "20"], "properties": ["snore, person, several", "loud, multiple, distance"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a person snoring loudly"], "question": "which person is snoring", "label": 1}, {"captions": ["a woman sneezes then speaks", "people speak and tapping occurs"], "sample_ids": ["x4dZyf9Gbj0", "tFCUUGdREgA"], "start_seconds": ["130", "70"], "properties": ["sneezes, speaks, woman", "people, tap, speak"], "captions_pred_video": ["footage is blurry and out of focus", "a person riding a white horse in an indoor arena"], "captions_pred_audio": ["a woman sneezes and speaks", "a man is speaking and walking with wind noise in the background "], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["scraping and female speech with distant music", "wind blowing followed by a zoom"], "sample_ids": ["yHeVV-xeOxQ", "vr8ZXjEBhMQ"], "start_seconds": ["130", "150"], "properties": ["female, speech, music", "wind, blow, zoom"], "captions_pred_video": ["of a girl milking a goat's udder", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a video of a wind blowing?", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "a kid speaks followed by music playing"], "sample_ids": ["yaln9y8I7ms", "tQWGZLItBXk"], "start_seconds": ["230", "170"], "properties": ["female, flushes, toilet", "music, kid, speak"], "captions_pred_video": ["footage is blurry and out of focus", "worms revolution screenshots"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a child speaks music plays video game sounds sound effects and sound effects play "], "question": "which entity has a female speaking?", "label": 0}, {"captions": ["a woman speaks happily and an animal chirps", "a man speaks while rain falls onto a hard surface"], "sample_ids": ["uWAAAL4CIoc", "wqN6IIHw3po"], "start_seconds": ["0", "30"], "properties": ["a woman, chirps, animal", "rain, surface, fall"], "captions_pred_video": [null, "in your own words what is happening in this screenshot? blood splattered all over the place"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a man is speaking and water is splashing"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["birds twitter and chirp and clatter", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["yeFvk9x0wWI", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["chirp, twitter, clatter", "men, talk, cars"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a consistent ticking pattern", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sCeWURVHfOM", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["ticking, pattern, clock", "men, talk, cars"], "captions_pred_video": ["- a close-up view of the clock's inner workings", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["ticking of a clock", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a vehicle engine revs and tires squeal", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["yDoT73BWsdA", "tDVADusiIoc"], "start_seconds": ["10", "60"], "properties": ["engine revs, tires squeal, vehicle", "water, radio, man"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["white noise and birds chirping", "multiple people speak and children yell while water gurgles"], "sample_ids": ["wRBHTgrbiwg", "vb1fPSDI4c"], "start_seconds": ["50", "30"], "properties": ["noise, white, chirping", "multiple, people, yell"], "captions_pred_video": ["of a bee pollinating the flowers in the field", null], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a stream of water flows as people talk and wind blows", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["xBxDz0CFVn0", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["stream, water, flow", "People, motor, brakes"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is more likely to be in a car?", "label": 1}, {"captions": ["a toilet flushes and water drains", "paper is crumpling consistently"], "sample_ids": ["sfAvvZwdLCY", "v5cSxLaHADY"], "start_seconds": ["20", "0"], "properties": ["water drains, flushes, water", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a toilet is flushed", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a person is snoring while sleeping", "a car accelerates and wind blows"], "sample_ids": ["vJrjSeP17yE", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["a person is sleeping, snoring, person", "accelerates, wind, blows"], "captions_pred_video": ["a black background with a small plane flying in the sky", null], "captions_pred_audio": ["a person snoring loudly", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "birds chirp and a dog breathes heavily"], "sample_ids": ["yajyRTUQk3U", "y2ZBGpgbhHM"], "start_seconds": ["400", "30"], "properties": ["a woman, something, fried", "dog, chirp, breathe"], "captions_pred_video": ["- a woman cooking in the kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "birds chirping and a dog panting"], "question": "which entity is a dog", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "vehicles pass by on a roadway"], "sample_ids": ["y2ZBGpgbhHM", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["birds, tweet, pant", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["birds chirping and a dog panting", "a car is driving on the road "], "question": "which entity is more likely to be seen in a city", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "a frog croaks as other frogs croak in the background"], "sample_ids": ["uEU-Hg5MTN8", "yswmmRZFItk"], "start_seconds": ["27", "0"], "properties": ["animal, grunts, snorts", "background, frog, croak"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "a close up of a frog in the water"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a frog is croaking"], "question": "which animal is speaking", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "a clock ticktocks"], "sample_ids": ["sQGXqGcwOTc", "v-g-j2uTByM"], "start_seconds": ["3", "30"], "properties": ["audio, kid, giggles", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["an airplane accelerates briefly", "an infant crying frantically"], "sample_ids": ["zjTG0gaGCUI", "zwOBqeFTgiU"], "start_seconds": ["80", "30"], "properties": ["accelerates, airplane, briefly", "cry, infant, frantically"], "captions_pred_video": [null, "of the baby crying in the car seat"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["vehicles pass by on a roadway", "several insects fly while two men talk"], "sample_ids": ["tgbONvsP47Y", "s-T9OVOiMLo"], "start_seconds": ["0", "330"], "properties": ["pass, vehicle, roadway", "several, fly, men"], "captions_pred_video": ["footage of a fire truck entering a garage", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a car is driving on the road ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is not a vehicle?", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "distant men speak as a spray can nozzle is depressed"], "sample_ids": ["tQWGZLItBXk", "rwtmaKiCcQU"], "start_seconds": ["170", "30"], "properties": ["voice, music, whoosh", "nozzle, depressed, spray can"], "captions_pred_video": ["worms revolution screenshots", "shows a man spraying paint on a wall with a spray gun"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "spraying and people speaking"], "question": "which entity has a nozzle depressed?", "label": 1}, {"captions": ["motors runs briefly and tires screech", "water flows and trickles"], "sample_ids": ["yRx9txMcBl0", "tB7hWb9gTuQ"], "start_seconds": ["40", "30"], "properties": ["motors, tires, screech", "water, flow, trickle"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a car is revving its engine and skidding ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a person sniffles and then sneezes in the distance", "a man speaks as a motor runs in the background"], "sample_ids": ["uRlbY6aoBU", "xZepNM9qcRA"], "start_seconds": ["0", "30"], "properties": ["a, distance, sneeze", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is sneezing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vimzuGQvdcU", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["a, man, yells", "loud, multiple, distance"], "captions_pred_video": ["a group of people are rafting down a river", null], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["an engine runs and wind blows", "an adult speaks and is typing on a computer keyboard"], "sample_ids": ["vs65y4qmyBE", "x9JovgqUcs"], "start_seconds": ["340", "500"], "properties": ["engine, run, wind", "An adult is speaking, typing, and using a computer keyboard"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man speaks and types on a keyboard"], "question": "which entity is a person", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "wind blowing followed by a zoom"], "sample_ids": ["zofjfKhqLk8", "vr8ZXjEBhMQ"], "start_seconds": ["10", "150"], "properties": ["background, metal, clings", "wind, blow, zoom"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["u7C-AEBQM", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["ticks, rhythmic, quiet", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a ticktock of a clock", "a duck quacks and a woman speaks"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a woman speaks and food sizzles while frying", "vehicles pass by on a roadway"], "sample_ids": ["wTideSjRFS0", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["food, sizzle, woman", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman and man speak while food is frying", "a stream of water runs briefly"], "sample_ids": ["zk-xJGQU8-4", "x-PeY8Yb8M4"], "start_seconds": ["130", "300"], "properties": ["food, man, woman", "stream, water, run"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["small dogs yip and bark sharply", "an insect buzzes around continuously"], "sample_ids": ["v-wcQf4BDY0", "v25l1jef3JY"], "start_seconds": ["120", "0"], "properties": ["bark, yip, sharply", "buzzes, continuously, insect"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a dog barks and growls", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "people applaud and hoot and chat quietly"], "sample_ids": ["t8CV69hcvF0", "wwyfGO2J4"], "start_seconds": ["210", "90"], "properties": ["person, sneeze, follow", "people, applaud, hoot"], "captions_pred_video": ["of an airplane flying in the dark sky at night", null], "captions_pred_audio": ["a woman sneezes and speaks", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be at a concert", "label": 1}, {"captions": ["a woman speaks and dog vocalizes", "a person is snoring while sleeping"], "sample_ids": ["uWAAAL4CIoc", "vJrjSeP17yE"], "start_seconds": ["0", "40"], "properties": ["a, dog, vocalize", "a person is sleeping, snoring, person"], "captions_pred_video": [null, "a black background with a small plane flying in the sky"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a person snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "an small aircraft engine runs and a boy speaks"], "sample_ids": ["uYT5gxnyMWM", "xSKJGCItUWE"], "start_seconds": ["50", "10"], "properties": ["female, spraying, scream", "engine, run, boy"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage of the helicopter flying in the room"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a high pitched engine is running and a child speaks"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["a clock ticks quietly and rhythmically", "a man speaks as a car is passing by"], "sample_ids": ["u7C-AEBQM", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["ticks, rhythmic, quiet", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking with background noise and breathing sounds "], "question": "which entity is moving", "label": 1}, {"captions": ["a person is snoring while sleeping", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["vJrjSeP17yE", "sLUnaPT5gM8"], "start_seconds": ["40", "0"], "properties": ["a person is sleeping, snoring, person", "loud, laughter, intermittent"], "captions_pred_video": ["a black background with a small plane flying in the sky", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a person snoring loudly", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["water flows as men speak and yell", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["vJ7JPEFhyLA", "tDlysoZiA1I"], "start_seconds": ["16", "0"], "properties": ["water, flow, men", "animal, grunts, chirps"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "birds are chirping and a rooster is crowing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["v5P-ThUCINM", "wz7N8YRy74I"], "start_seconds": ["400", "30"], "properties": ["background, chirp, bird", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "people speak as gunfire rings out"], "sample_ids": ["uYT5gxnyMWM", "wqTCwqVRDlk"], "start_seconds": ["50", "80"], "properties": ["person, spray, yell", "gunfire, ring, speak"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "dishes cling together then a man begins to speak"], "sample_ids": ["zdYdyF9-m8U", "sQGXqGcwOTc"], "start_seconds": ["7", "3"], "properties": ["wind, crash, shoreline", "cling, speak, dishes"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["waves crash and wind blows ", "mechanisms are operating and water is splashing "], "question": "which entity is a still image?", "label": 0}, {"captions": ["a man speaks while water trickles and flows", "a man speaks followed by another man speaking outside"], "sample_ids": ["sapQIQUhFc", "viuTg1M-dqg"], "start_seconds": ["280", "30"], "properties": ["water, trickles, flow", "two men, speak, follow"], "captions_pred_video": [null, "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a horn honks and then loudly blares", "cats meow and then a person begins to talk while the cats continue to meow"], "sample_ids": ["wnpJndXuxLc", "x5cuQjOdM3E"], "start_seconds": ["50", "30"], "properties": ["horn, honk, loud", "cat, talk, meow"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a black background with an airplane flying in the sky"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a cat meows and a woman speaks"], "question": "which entity is quieter", "label": 1}, {"captions": ["birds vocalize and chirp continuously", "a woman speaks in a fast tone with a male"], "sample_ids": ["w1mlz3Pe4fU", "sTpirNYo8vQ"], "start_seconds": ["300", "30"], "properties": ["vocalize, chirp, continuously", "a, tone, fast"], "captions_pred_video": ["of a bird in a cage", "of a man taking a selfie on a bus"], "captions_pred_audio": ["birds are chirping and singing", "a man is speaking while a car is revving and accelerating "], "question": "which entity is speaking", "label": 1}, {"captions": ["a person screams glaringly", "pigeons vocalize and birds chirp"], "sample_ids": ["xC8kbrKJmco", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["glaringly, screams, person", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a goat is bleating ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["uWPRNLnpy7Y", "tDVADusiIoc"], "start_seconds": ["10", "60"], "properties": ["accelerate, laugh, vehicle", "water, radio, man"], "captions_pred_video": ["is taken from a car driving down the street", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "a stream of water runs briefly"], "sample_ids": ["zhx6hoYrHeI", "x-PeY8Yb8M4"], "start_seconds": ["160", "300"], "properties": ["engine, sputter, rough", "stream, water, run"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "sawing of wood and rustling with leaves blowing in the distance"], "sample_ids": ["ukxt9I7eMMg", "uiItxDsDMFI"], "start_seconds": ["30", "30"], "properties": ["continuous, woman, speaking", "sound, distance, leaves"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "a man cutting a log with an axe in the woods"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a saw is being used with background noise "], "question": "which entity has a woman speaking towards the end?", "label": 0}, {"captions": ["multiple people speak and children yell while water gurgles", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vb1fPSDI4c", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["multiple, people, yell", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a game", "label": 1}, {"captions": ["someone sprays a liquid onto a hard surface making a hiss sound", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["zO-LSSY92ZM", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["liquid, surface, sound", "engine, laugh, loud"], "captions_pred_video": ["youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'", "footage of a man driving a car in the dark"], "captions_pred_audio": ["steam is hissing and hissing", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "people cheer as a vehicle engine revs"], "sample_ids": ["tDlfY3nmx1A", "xjhAnI2q6hM"], "start_seconds": ["160", "6"], "properties": ["applause, laugh, man", "engine revs, vehicle, people"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "an infant crying frantically"], "sample_ids": ["wwyfGO2J4", "zwOBqeFTgiU"], "start_seconds": ["90", "30"], "properties": ["people, applaud, hoot", "cry, infant, frantically"], "captions_pred_video": [null, "of the baby crying in the car seat"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "pigeons vocalize and birds chirp"], "sample_ids": ["ul60S8TXDA8", "uiS58TNyUiw"], "start_seconds": ["60", "430"], "properties": ["sound, distance, bell", "vocalize, bird, chirp"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "of the pigeon in the cage"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "a man speaks as a motor runs in the background"], "sample_ids": ["vbr9mHKc8WM", "xZepNM9qcRA"], "start_seconds": ["40", "30"], "properties": ["noise, loudness, engine", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["an engine is idling", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is quieter", "label": 0}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "an electric engine works nearby followed by a child talking"], "sample_ids": ["tQWGZLItBXk", "xSKJGCItUWE"], "start_seconds": ["170", "10"], "properties": ["music, person, ding", "engine, work, child"], "captions_pred_video": ["worms revolution screenshots", "footage of the helicopter flying in the room"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a high pitched engine is running and a child speaks"], "question": "which entity has a child talking?", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "birds chirp and an insect buzzes around"], "sample_ids": ["su6FAOcOA8c", "t97k0cejSQE"], "start_seconds": ["4", "250"], "properties": ["engine, run, woman", "bird, chirp, insect"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "a bee on a purple thistle flower"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a bee buzzes and a woman speaks"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a dog barks and whimpers", "a crowd yells, reacts and applauds"], "sample_ids": ["sShpyu2l4YQ", "wztCSUxOf8"], "start_seconds": ["0", "130"], "properties": ["barks, whimpers, dog", "a crowd, yells, applauds"], "captions_pred_video": ["the puppies are playing with a toy", null], "captions_pred_audio": ["a dog is barking and growling", "a man is speaking and a crowd is clapping"], "question": "which entity is more likely to be a crowd", "label": 1}, {"captions": ["wind blowing and birds chirping with the distant cooing of a large bird", "birds chirp and objects are moved around"], "sample_ids": ["wRBHTgrbiwg", "yPUYU6t3rwo"], "start_seconds": ["50", "370"], "properties": ["birds, chirp, cooing", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a stream runs then someone speaks", "a car accelerates and wind blows"], "sample_ids": ["wbHTKEJZyhc", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["stream, run, someone", "accelerates, wind, blows"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", null], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a door slams shut roughly", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["zkKdxzNC97Y", "zj2R0XoFr5k"], "start_seconds": ["27", "50"], "properties": ["a door, slams, shut", "airplane, boy, fly"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a door is opened and closed", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "birds chirp and objects are moved around"], "sample_ids": ["tiDFTC-5vU", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["male, duck, laugh", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "a car accelerates and wind blows"], "sample_ids": ["sG7TyPnFDR0", "u0TrcHhkPQ"], "start_seconds": ["180", "20"], "properties": ["beeps, machine, smoke alarm", "accelerates, wind, blows"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", null], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "a steam engine runs and whistles as it passes by"], "sample_ids": ["sTpirNYo8vQ", "se87d6yxEOA"], "start_seconds": ["30", "10"], "properties": ["a, tone, fast", "run, whistle, pass"], "captions_pred_video": ["of a man taking a selfie on a bus", "footage of a train passing by a train station with smoke billowing out of the train's smokestack"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a train is moving and blowing its whistle "], "question": "which entity is moving", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "some men converse over an engine running"], "sample_ids": ["vXlk0lIQBFo", "sCiy7QS1U"], "start_seconds": ["470", "300"], "properties": ["wind, talk, vocalize", "men, converse, engine"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", null], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a conversation?", "label": 1}, {"captions": ["continuous sneezing together with speech", "people speak as gunfire rings out"], "sample_ids": ["x4dZyf9Gbj0", "wqTCwqVRDlk"], "start_seconds": ["130", "80"], "properties": ["continuous, sneeze, speech", "gunfire, ring, speak"], "captions_pred_video": ["footage is blurry and out of focus", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman sneezes and speaks", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a vehicle engine accelerating then running on idle", "paper is crumpling consistently"], "sample_ids": ["vYkA3cfXp5Q", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["engine, accelerate, idle", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["an engine is idling", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "a man speaks as a motor runs in the background"], "sample_ids": ["w0xsN8X18Y", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["music, surface, rain", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man speaks as a car is passing by", "an infant crying frantically"], "sample_ids": ["sK4u5T8hW78", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["a, car, pass", "cry, infant, frantically"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["an insect buzzes around continuously", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["v25l1jef3JY", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["buzzes, continuously, insect", "a woman, laughs, animal"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a woman is speaking and a baby is crying"], "question": "which entity is not a person?", "label": 0}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["x5cuQjOdM3E", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["cat, talk, meow", "water, radio, man"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds twitter and chirp and clatter", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["yeFvk9x0wWI", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["chirp, twitter, clatter", "engine, laugh, loud"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["yYEVLuqEytU", "vbZ-0lGPneg"], "start_seconds": ["40", "30"], "properties": ["animal, pig, background", "a woman, a television program, a bird"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program?", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["sfAvvZwdLCY", "wDVMhEdTiVw"], "start_seconds": ["20", "30"], "properties": ["flushes, drains, water", "gun, shoot, water"], "captions_pred_video": ["footage of the toilet in the bathroom", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a toilet is flushed", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity has water sloshing nearby?", "label": 1}, {"captions": ["vehicles pass by on a roadway", "birds tweet and squawk"], "sample_ids": ["tgbONvsP47Y", "w1mlz3Pe4fU"], "start_seconds": ["0", "300"], "properties": ["pass, vehicle, roadway", "squawk, tweet, scream"], "captions_pred_video": ["footage of a fire truck entering a garage", "of a bird in a cage"], "captions_pred_audio": ["a car is driving on the road ", "birds are chirping and singing"], "question": "which entity is not a vehicle?", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["y2ZBGpgbhHM", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["birds, tweet, pant", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds chirping and a dog panting", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about animals?", "label": 0}, {"captions": ["a man sprays as a scraping occurs in the background", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sOa7g-44Dag", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["background, man, spray", "engine, idle, woman"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a person is snoring while sleeping", "plastic is tapped on while someone speaks"], "sample_ids": ["vJrjSeP17yE", "wvKpEYswXO0"], "start_seconds": ["40", "150"], "properties": ["a person is sleeping, snoring, person", "plastic, tap, speak"], "captions_pred_video": ["a black background with a small plane flying in the sky", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["a person screams glaringly", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["xC8kbrKJmco", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["glaringly, screams, person", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a goat is bleating ", "a woman is speaking and a subway train is moving "], "question": "which entity is a person", "label": 0}, {"captions": ["a stream of water runs briefly", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["x-PeY8Yb8M4", "uYT5gxnyMWM"], "start_seconds": ["300", "50"], "properties": ["stream, water, run", "female, spraying, scream"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a car is driving on a wet road ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person spraying and screaming?", "label": 1}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "a stream of water runs briefly"], "sample_ids": ["vfYTJq7nU", "x-PeY8Yb8M4"], "start_seconds": ["130", "300"], "properties": ["ducks, quack, man", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a woman sneezes then speaks", "a stream of water runs briefly"], "sample_ids": ["x4dZyf9Gbj0", "x-PeY8Yb8M4"], "start_seconds": ["130", "300"], "properties": ["sneezes, speaks, woman", "stream, water, run"], "captions_pred_video": ["footage is blurry and out of focus", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman sneezes and speaks", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a cat meows and children speak", "a frog croaks as other frogs croak in the background"], "sample_ids": ["x5cuQjOdM3E", "yswmmRZFItk"], "start_seconds": ["30", "0"], "properties": ["cat, speak, children", "background, frog, croak"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a close up of a frog in the water"], "captions_pred_audio": ["a cat meows and a woman speaks", "a frog is croaking"], "question": "which entity is a croaker", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "some men converse over an engine running"], "sample_ids": ["vbZ-0lGPneg", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["a woman, a television program, a bird", "men, converse, engine"], "captions_pred_video": ["of a man holding a baby duck in his hands", null], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has more people", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "waves crash against a shoreline and people speak"], "sample_ids": ["sSMl2vc3ek", "yFB25fqfU8I"], "start_seconds": ["20", "300"], "properties": ["a person, laughs, snores", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more active", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "wind blowing followed by a zoom"], "sample_ids": ["sU53zg9Jp7s", "vr8ZXjEBhMQ"], "start_seconds": ["380", "150"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "wind, blow, zoom"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more likely to cause a woman to gasp", "label": 0}, {"captions": ["a vehicle accelerates and squeals tires", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["yRx9txMcBl0", "wDVMhEdTiVw"], "start_seconds": ["40", "30"], "properties": ["accelerates, tires, squeals", "gun, shoot, water"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a gun", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "a woman speaks as she rubs two objects together"], "sample_ids": ["s7knHCFW82w", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["blow horn, get close, train", "two objects, woman, speak"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "a duck quacks continuously"], "sample_ids": ["sShpyu2l4YQ", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["growl, bark, yip", "quacks, continuously, duck"], "captions_pred_video": ["the puppies are playing with a toy", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a dog is barking and growling", "a duck is quacking loudly"], "question": "which entity is a bird", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a telephone rings followed by a woman talking"], "sample_ids": ["y2bVZ7rz-5M", "tGcFnX0GHI"], "start_seconds": ["280", "0"], "properties": ["motor noise, horn, siren", "ring, talk, woman"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", null], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is followed by a woman talking", "label": 1}, {"captions": ["speaking following by laughing and clapping", "a man laughs and speaks as cats purr and hiss"], "sample_ids": ["u2f5NpsoHBg", "vVhthZ45k3Y"], "start_seconds": ["30", "30"], "properties": ["person, laugh, clap", "cat, purr, hiss"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a man is speaking and a cat is meowing"], "question": "which entity is a person", "label": 0}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["w2JXXIAdUdg", "uEU-Hg5MTN8"], "start_seconds": ["10", "27"], "properties": ["snoring, distance, person", "a woman, laughs, animal"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a woman is speaking and a baby is crying"], "question": "which entity has a person snoring nearby?", "label": 0}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "people cheer as a vehicle engine revs"], "sample_ids": ["tDVADusiIoc", "xjhAnI2q6hM"], "start_seconds": ["60", "6"], "properties": ["water, radio, man", "engine revs, vehicle, people"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "an airplane engine runs"], "sample_ids": ["zj2R0XoFr5k", "yVPZ2MNWpms"], "start_seconds": ["50", "0"], "properties": ["airplane, fly, overhead", "engine, airplane, runs"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a car is driving by on the road "], "question": "which airplane is flying overhead", "label": 0}, {"captions": ["a man yells and speaks as water splashes", "water pouring and bubbling"], "sample_ids": ["vimzuGQvdcU", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["a, man, yells", "water, bubbles, pouring"], "captions_pred_video": ["a group of people are rafting down a river", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "water is running from a faucet"], "question": "which entity is a video of water pouring and bubbling?", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "vehicles pass by on a roadway"], "sample_ids": ["tEE3MpBt1sg", "tgbONvsP47Y"], "start_seconds": ["50", "0"], "properties": ["drill, something, laugh", "pass, vehicle, roadway"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage of a fire truck entering a garage"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks and food sizzles while frying", "a man speaks and is typing on a keyboard"], "sample_ids": ["wTideSjRFS0", "x9JovgqUcs"], "start_seconds": ["30", "500"], "properties": ["food, sizzle, woman", "a, man, speaks, keyboard"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man speaks and types on a keyboard"], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "a stream of water runs briefly"], "sample_ids": ["sG7TyPnFDR0", "x-PeY8Yb8M4"], "start_seconds": ["180", "300"], "properties": ["beeps, machine, smoke alarm", "stream, water, run"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "three men talk while wind blows and some liquid flows"], "sample_ids": ["yVumC9TGknc", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["humming, clock, birds", "three men, wind, flow"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a series of beeps and chirps", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["children speak as a female ask them questions", "people applaud and hoot and chat quietly"], "sample_ids": ["wEBlkGWVWwE", "wwyfGO2J4"], "start_seconds": ["260", "90"], "properties": ["female, speak, questions", "people, applaud, hoot"], "captions_pred_video": ["shows a person writing on the whiteboard", null], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sofxkNWaP0s", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["wind, engine, louder", "loud, laughter, intermittent"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a crowd yells, reacts and applauds", "a man speaks followed by another man speaking outside"], "sample_ids": ["wztCSUxOf8", "viuTg1M-dqg"], "start_seconds": ["130", "30"], "properties": ["a crowd, yells, applauds", "two men, speak, follow"], "captions_pred_video": [null, "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a man is speaking with background noise and breathing sounds "], "question": "which entity shows two men speaking?", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vmrxwuAMb2I", "tDVADusiIoc"], "start_seconds": ["40", "60"], "properties": ["a dog, inhales, exhales", "water, radio, man"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a dog barks and growls", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio?", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "a telephone rings followed by a woman talking"], "sample_ids": ["zFjIWfSD-4", "tGcFnX0GHI"], "start_seconds": ["410", "0"], "properties": ["People, motor, brakes", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "water pouring and bubbling"], "sample_ids": ["vSeGhaZt-aI", "uyRfq-jKPpo"], "start_seconds": ["50", "50"], "properties": ["water, bubbles, speak", "water, bubbles, pouring"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "water is running from a faucet"], "question": "which entity is more likely to be in a bathroom", "label": 1}, {"captions": ["a flush is followed by gurgling water, then another flush", "a saw finishes running as metal clings in the background"], "sample_ids": ["tqR406bGiE", "zofjfKhqLk8"], "start_seconds": ["40", "10"], "properties": ["flush, water, gurgle", "background, metal, clings"], "captions_pred_video": [null, "footage of a man using a machine to cut a piece of wood"], "captions_pred_audio": ["a toilet is flushed", "a large engine is running and a bell is ringing"], "question": "which entity is about a flushing toilet?", "label": 0}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "roadway noise occurs and a truck accelerates"], "sample_ids": ["su6FAOcOA8c", "tgbONvsP47Y"], "start_seconds": ["4", "0"], "properties": ["engine, idle, woman", "noise, truck, accelerate"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a car is driving on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "a train horn blows as it passes by"], "sample_ids": ["vbr9mHKc8WM", "zVacuqSb4LI"], "start_seconds": ["40", "30"], "properties": ["noise, loudness, engine", "horn, blows, train"], "captions_pred_video": [null, "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["an engine is idling", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which is louder", "label": 0}, {"captions": ["an animal quacks rapidly", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vh30P49Po6s", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["animal, quacks, rapidly", "three men, wind, flow"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a person", "label": 0}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["smDKStoHBJo", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["a, infant, speaking", "engine, idle, woman"], "captions_pred_video": ["a man holding a crying baby in his arms", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman speaking to an infant?", "label": 0}, {"captions": ["a clock ticks quietly and rhythmically", "a dog barks and whimpers"], "sample_ids": ["u7C-AEBQM", "sShpyu2l4YQ"], "start_seconds": ["30", "0"], "properties": ["ticks, rhythmic, quiet", "barks, whimpers, dog"], "captions_pred_video": [null, "the puppies are playing with a toy"], "captions_pred_audio": ["a ticktock of a clock", "a dog is barking and growling"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man speaks as music plays before artillery is fired", "a person speaks briefly"], "sample_ids": ["vcmWSmvti8", "zOZleIRqZm4"], "start_seconds": ["30", "80"], "properties": ["music, man, fire", "person, talk, brief"], "captions_pred_video": [null, "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a man is speaking with crickets chirping in the background"], "question": "which entity is more like a talk show", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "water pouring and bubbling"], "sample_ids": ["yLy-WycbVVE", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["background, people, talk", "water, bubbles, pouring"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "water is running from a faucet"], "question": "which entity is more silent", "label": 1}, {"captions": ["an insect buzzes around continuously", "an insect buzzes around continuously"], "sample_ids": ["v25l1jef3JY", "v25l1jef3JY"], "start_seconds": ["0", "0"], "properties": ["buzzes, continuously, insect", "buzzes, continuously, insect"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a fly is buzzing around a microphone "], "question": "which insect buzzes around continuously", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "an infant crying frantically"], "sample_ids": ["vlJS7LN2XyM", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["background, clocks, ticking", "cry, infant, frantically"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "of the baby crying in the car seat"], "captions_pred_audio": ["a ticktock of a clock", "a baby cries loudly"], "question": "which entity is more active", "label": 1}, {"captions": ["ticking continues without interruption", "multiple people speak and children yell while water gurgles"], "sample_ids": ["v-g-j2uTByM", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["ticking, continuous, clock", "multiple, people, yell"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", null], "captions_pred_audio": ["a clock is ticking loudly", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a kid speaks followed by music playing", "a woman speaks and is crumpling paper"], "sample_ids": ["tQWGZLItBXk", "xvDdE3zNf8Y"], "start_seconds": ["170", "120"], "properties": ["music, kid, speak", "A, crumple, paper"], "captions_pred_video": ["worms revolution screenshots", "of a woman in a white shirt and glasses holding a purple tie"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a woman speaks and crumples paper"], "question": "which entity is crumpling paper", "label": 1}, {"captions": ["an adult woman speaks over chopping and silverware noises", "a stream of water runs briefly"], "sample_ids": ["yYJksgsxx5U", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["audio, woman, silverware", "stream, water, run"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "a car accelerates and wind blows"], "sample_ids": ["xyL9F5VrjkE", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["wind, blows, vehicle", "accelerates, wind, blows"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", null], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["wvKpEYswXO0", "su6FAOcOA8c"], "start_seconds": ["150", "4"], "properties": ["plastic, tap, speak", "engine, idle, woman"], "captions_pred_video": ["of the person preparing food in the kitchen", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "an infant crying frantically"], "sample_ids": ["sShpyu2l4YQ", "zwOBqeFTgiU"], "start_seconds": ["0", "30"], "properties": ["growl, bark, yip", "cry, infant, frantically"], "captions_pred_video": ["the puppies are playing with a toy", "of the baby crying in the car seat"], "captions_pred_audio": ["a dog is barking and growling", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a woman speaks with water running", "a car accelerates and wind blows"], "sample_ids": ["wTideSjRFS0", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["water, running, woman", "accelerates, wind, blows"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "people speak as gunfire rings out"], "sample_ids": ["sYITalLZjj4", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["water, rushes, background, birds", "gunfire, ring, speak"], "captions_pred_video": ["two ducks are swimming in the water near each other", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["wind blows and birds chirp", "a man is speaking and a gun is fired"], "question": "which entity is more quiet", "label": 0}, {"captions": ["scraping and female speech with distant music", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["yHeVV-xeOxQ", "uYT5gxnyMWM"], "start_seconds": ["130", "50"], "properties": ["female, speech, music", "a, scream, girl"], "captions_pred_video": ["of a girl milking a goat's udder", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream", "label": 1}, {"captions": ["people speak then an engine runs", "a man speaks as a motor runs in the background"], "sample_ids": ["uMTTDZ2mb4", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["engine, run, people", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sOa7g-44Dag", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["background, man, spray", "rooster, crow, background, men"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["a woman and man are speaking", "an animal quacks rapidly"], "sample_ids": ["vbpKkWvfOu4", "vh30P49Po6s"], "start_seconds": ["560", "30"], "properties": ["two people, speaking, woman, man", "animal, quacks, rapidly"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a duck is quacking loudly"], "question": "which entity is speaking", "label": 0}, {"captions": ["birds chirps while a siren signals in the distance", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["uKCSGgof8gI", "su6FAOcOA8c"], "start_seconds": ["12", "4"], "properties": ["chirps, distance, signal", "engine, idle, woman"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "a motor noise is accompanied by a door opening and closing"], "sample_ids": ["w5W5Kqtc8E", "vBHyYJ8pL0"], "start_seconds": ["100", "2"], "properties": ["wind, blow, vehicle", "noise, door, opening"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is accompanied by a door opening and closing?", "label": 1}, {"captions": ["a sleeping person emits a gravely snore", "a child speaks in closed space"], "sample_ids": ["w2JXXIAdUdg", "yW6FWLSLkx4"], "start_seconds": ["10", "40"], "properties": ["emits, sleeping, person", "child, space, speak"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a baby cries and wails as an adult female speaks", "water splashing and a person laughs in the distance then a man speaks nearby"], "sample_ids": ["zliInBdC98Y", "vddP56-ogds"], "start_seconds": ["30", "30"], "properties": ["a, baby, cries, wails", "water, splash, person, laugh"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", null], "captions_pred_audio": ["a baby cries and a woman speaks", "water is running and gurgling and a man is speaking"], "question": "which entity is more active", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "a stream of water flows as people talk and wind blows"], "sample_ids": ["w34HjHr6gAY", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["beeps, squawk, child speaking", "stream, water, flow"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "footage is blurry and out of focus"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a man is speaking with wind noise in the background "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "an infant crying as a woman laughs"], "sample_ids": ["vveS8HT7Uog", "xhmRY9yhC7c"], "start_seconds": ["100", "20"], "properties": ["a man, objects, speak", "a, laugh, infant"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["w34HjHr6gAY", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["beeps, squawk, child speaking", "loud, laughter, intermittent"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["an electronic device bleeps once", "a vehicle engine accelerating then running on idle"], "sample_ids": ["tHJ6JSa8Y4", "vYkA3cfXp5Q"], "start_seconds": ["0", "30"], "properties": ["bleeps, electronic, device", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a clock is ticking and beeping", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "wind blowing and birds chirping with the distant cooing of a large bird"], "sample_ids": ["uZesmtKZGSw", "wRBHTgrbiwg"], "start_seconds": ["250", "50"], "properties": ["men, talk, cars", "birds, chirp, cooing"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "of a bee pollinating the flowers in the field"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "birds are chirping and insects are buzzing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a child speaks", "after a few seconds of silence, a loud bang occurs followed by a softer banging noise"], "sample_ids": ["yW6FWLSLkx4", "zkKdxzNC97Y"], "start_seconds": ["40", "27"], "properties": ["a, child, speaks", "loud, bang, noise"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage of the door opening and closing in slow motion"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a door is opened and closed"], "question": "which entity is silent", "label": 1}, {"captions": ["some men converse over an engine running", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["sCiy7QS1U", "ziUT9IFTkjg"], "start_seconds": ["300", "10"], "properties": ["men, converse, engine", "background, birds, rustling"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "vehicles pass by on a roadway"], "sample_ids": ["xM4joTqDVp4", "tgbONvsP47Y"], "start_seconds": ["160", "0"], "properties": ["background, chirp, birds", "pass, vehicle, roadway"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "footage of a fire truck entering a garage"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a car is driving on the road "], "question": "which entity is a video of a train chugging?", "label": 0}, {"captions": ["a man speaks over intermittent keyboard taps", "a clicking followed by some people laughing and a kid speaking"], "sample_ids": ["tw76HGONaKg", "vz8868znkVQ"], "start_seconds": ["570", "60"], "properties": ["audio, man, keyboard", "audio, click, kid speaking"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "a video of a plane flying over a cloudy sky"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a baby is laughing and breathing with background noise "], "question": "which entity has a man speaking over intermittent keyboard taps?", "label": 0}, {"captions": ["water flows followed by women screaming", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["w5W5Kqtc8E", "tiDFTC-5vU"], "start_seconds": ["100", "30"], "properties": ["water, flow, women", "male, duck, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking and ducks are quacking"], "question": "which entity is a group of people", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "someone snores nearby"], "sample_ids": ["siJFXfGWgDk", "spJCm8tD9Zo"], "start_seconds": ["50", "90"], "properties": ["a, bird, vehicle", "someone snores, nearby, someone"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vzxHnu-SFEw", "sSMl2vc3ek"], "start_seconds": ["80", "20"], "properties": ["two objects, woman, speak", "loud, multiple, distance"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", null], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["an adult woman and an adult man speak", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["zTLVJCo4WEE", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["two people, adult, speak", "beeps, hit, woman"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "wind blows strongly"], "sample_ids": ["sncRqQ67iJU", "w8uLijTqtlU"], "start_seconds": ["460", "70"], "properties": ["loud, repeatedly, man", "wind, blows, strongly"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "footage is blurry and shaky"], "captions_pred_audio": ["a person is snoring", "the wind is blowing strongly"], "question": "which entity is not a person", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "several insects fly while two men talk"], "sample_ids": ["zofjfKhqLk8", "s-T9OVOiMLo"], "start_seconds": ["10", "330"], "properties": ["background, metal, clings", "several, fly, men"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more flying insects", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "bees buzz and wind blows"], "sample_ids": ["sWZzXuWYY", "tMJne1a4AFI"], "start_seconds": ["420", "0"], "properties": ["male, clanks, thumps", "bees buzz, wind blows, bees"], "captions_pred_video": [null, "a swarm of bees on the ground"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a swarm of bees buzzing around"], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp and a dog breathes heavily", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["y2ZBGpgbhHM", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["dog, chirp, breathe", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["birds chirping and a dog panting", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a crow?", "label": 1}, {"captions": ["wind blows strongly and a young man speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vs65y4qmyBE", "tDVADusiIoc"], "start_seconds": ["340", "60"], "properties": ["wind, blows, strongly", "water, radio, man"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "several insects fly while two men talk"], "sample_ids": ["sxYkFKFIZD0", "s-T9OVOiMLo"], "start_seconds": ["20", "330"], "properties": ["screech, man, door", "several, fly, men"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a beep occurs briefly", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["xtWeJ56-U-g", "sLUnaPT5gM8"], "start_seconds": ["20", "0"], "properties": ["beep, occur, briefly", "loud, laughter, intermittent"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a stream of water runs briefly", "people applaud and hoot and chat quietly"], "sample_ids": ["x-PeY8Yb8M4", "wwyfGO2J4"], "start_seconds": ["300", "90"], "properties": ["stream, water, run", "people, applaud, hoot"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", null], "captions_pred_audio": ["a car is driving on a wet road ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["ukg5L09Wpvo", "tdWhHV3X25Q"], "start_seconds": ["150", "60"], "properties": ["a train, a horn, a bell", "applause, audience, yells"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "a person snores loudly multiple times at a close distance"], "sample_ids": ["ylpYOorfH4o", "sSMl2vc3ek"], "start_seconds": ["410", "20"], "properties": ["engine, running, wind", "loud, multiple, distance"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", null], "captions_pred_audio": ["a man is speaking and an engine is revving", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a person snoring several times", "plastic is tapped on while someone speaks"], "sample_ids": ["spJCm8tD9Zo", "wvKpEYswXO0"], "start_seconds": ["90", "150"], "properties": ["snore, person, several", "plastic, tap, speak"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "a stream of water flows as people talk and wind blows"], "sample_ids": ["x5cuQjOdM3E", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["cat, talk, meow", "stream, water, flow"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage is blurry and out of focus"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["wz7N8YRy74I", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["rooster, crow, background, men", "a woman, laughs, animal"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a woman is speaking and a baby is crying"], "question": "which entity has a rooster?", "label": 0}, {"captions": ["wind blowing and birds chirping with the distant cooing of a large bird", "people applaud and hoot and chat quietly"], "sample_ids": ["wRBHTgrbiwg", "wwyfGO2J4"], "start_seconds": ["50", "90"], "properties": ["birds, chirp, cooing", "people, applaud, hoot"], "captions_pred_video": ["of a bee pollinating the flowers in the field", null], "captions_pred_audio": ["birds are chirping and insects are buzzing", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person speaks briefly", "a clock ticktocks"], "sample_ids": ["zOZleIRqZm4", "v-g-j2uTByM"], "start_seconds": ["80", "30"], "properties": ["person, talk, brief", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["an audience gives applause as a man yells and a group sings", "people speak as gunfire rings out"], "sample_ids": ["tdWhHV3X25Q", "wqTCwqVRDlk"], "start_seconds": ["60", "80"], "properties": ["applause, audience, yells", "gunfire, ring, speak"], "captions_pred_video": ["a man is talking to another man on a stage in front of a microphone", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["an electric engine works nearby followed by a child talking", "crowd applause while a guy laughs followed by another man speaking"], "sample_ids": ["xSKJGCItUWE", "tDlfY3nmx1A"], "start_seconds": ["10", "160"], "properties": ["engine, work, child", "applause, laugh, man"], "captions_pred_video": ["footage of the helicopter flying in the room", "a man in a suit and tie is talking to another man in a suit and tie"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a crowd is clapping and laughing and a man is speaking "], "question": "which entity is a performance", "label": 1}, {"captions": ["a dog barks and whimpers", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sShpyu2l4YQ", "wz7N8YRy74I"], "start_seconds": ["0", "30"], "properties": ["barks, whimpers, dog", "rooster, crow, background, men"], "captions_pred_video": ["the puppies are playing with a toy", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a dog is barking and growling", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a bird?", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "a man speaks as a car is passing by"], "sample_ids": ["sAam2NqGhLY", "sK4u5T8hW78"], "start_seconds": ["20", "30"], "properties": ["snoring, breathing, child", "a, car, pass"], "captions_pred_video": ["of a little girl sleeping on a couch", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a person is snoring", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "paper folding and crinkling"], "sample_ids": ["tEE3MpBt1sg", "zPpG3RD8lSs"], "start_seconds": ["50", "20"], "properties": ["drill, something, laugh", "paper, fold, crinkle"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "the wind blows and a mouse clicks "], "question": "which is not a drill", "label": 1}, {"captions": ["a train horn sounds as the train approaches", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["slZLHwNbbt4", "zj2R0XoFr5k"], "start_seconds": ["300", "50"], "properties": ["train, horn, sound", "airplane, boy, fly"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "small dogs yip and bark sharply"], "sample_ids": ["s59PfAghdkM", "v-wcQf4BDY0"], "start_seconds": ["0", "120"], "properties": ["bird, chirp, background, horse, neigh", "bark, yip, sharply"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a toilet flushes and water drains", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["sfAvvZwdLCY", "tw76HGONaKg"], "start_seconds": ["20", "570"], "properties": ["water drains, flushes, water", "A, game, keyboard"], "captions_pred_video": ["footage of the toilet in the bathroom", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a toilet is flushed", "a man speaks and types on a computer keyboard "], "question": "which object is a source of water", "label": 0}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "water flows as men speak and yell"], "sample_ids": ["w9lpbUn0hPc", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["male, wind, rustling", "water, flow, men"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more like a stream", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "water flows and trickles"], "sample_ids": ["uJV8NDaHqqk", "tB7hWb9gTuQ"], "start_seconds": ["100", "30"], "properties": ["loud, fly, chirp", "water, flow, trickle"], "captions_pred_video": ["a bee hive in a wooden box", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a swarm of bees buzzing around", "water is splashing and gurgling"], "question": "which entity is not loud", "label": 1}, {"captions": ["a small engine idles continuously", "a man speaks as a motor runs in the background"], "sample_ids": ["y5WII6cTH7k", "xZepNM9qcRA"], "start_seconds": ["40", "30"], "properties": ["engine, idle, continuously", "background, motor, run"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vh30P49Po6s", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["loud, continuous, quacks", "three men, wind, flow"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a duck?", "label": 1}, {"captions": ["wind blows and a stream of water flows nearby", "birds chirp and objects are moved around"], "sample_ids": ["sYITalLZjj4", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["stream, flow, wind", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["two ducks are swimming in the water near each other", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["wind blows and birds chirp", "insects buzz and a man speaks"], "question": "which entity is moving objects around", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "a stream of water flows as people talk and wind blows"], "sample_ids": ["w2M4i1mklOA", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["alarm, gears, turn", "stream, water, flow"], "captions_pred_video": ["footage of an antique clock", "footage is blurry and out of focus"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "a woman speaks as she rubs two objects together"], "sample_ids": ["zl9Dqx-j7q4", "vzxHnu-SFEw"], "start_seconds": ["6", "80"], "properties": ["motors rev, laugh, loudly", "two objects, woman, speak"], "captions_pred_video": ["footage of a man driving a car in the dark", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a jet engine roars ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "an infant crying frantically"], "sample_ids": ["uWPRNLnpy7Y", "zwOBqeFTgiU"], "start_seconds": ["10", "30"], "properties": ["accelerate, laugh, vehicle", "cry, infant, frantically"], "captions_pred_video": ["is taken from a car driving down the street", "of the baby crying in the car seat"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "someone snores nearby"], "sample_ids": ["xzKKf9bKNUo", "spJCm8tD9Zo"], "start_seconds": ["10", "90"], "properties": ["background, noise, snoring", "someone snores, nearby, someone"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a person snoring loudly", "a person is snoring loudly"], "question": "which entity is a recording of snoring?", "label": 0}, {"captions": ["a man speaks as music plays before artillery is fired", "a propeller rotates loudly and intensely"], "sample_ids": ["vcmWSmvti8", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["music, man, fire", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["x5cuQjOdM3E", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["cat, meows, young woman", "engine, laugh, loud"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a cat meows and a woman speaks", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "a child speaks in closed space"], "sample_ids": ["tGcFnX0GHI", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["ring, talk, woman", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a child speaking in a closed space?", "label": 1}, {"captions": ["a person snores loudly multiple times at a close distance", "paper is crumpling consistently"], "sample_ids": ["sSMl2vc3ek", "v5cSxLaHADY"], "start_seconds": ["20", "0"], "properties": ["loud, multiple, distance", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a person snoring loudly", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "wind blows as people chatter quietly"], "sample_ids": ["spYNpeN7rPY", "xBxDz0CFVn0"], "start_seconds": ["1", "30"], "properties": ["a clock, ticktock, man", "wind, chatter, people"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["yaln9y8I7ms", "vb1fPSDI4c"], "start_seconds": ["230", "30"], "properties": ["female, flushes, toilet", "multiple, people, yell"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a toilet flushes and a man speaks", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a door opens and birds chirp", "a child yells and another yells"], "sample_ids": ["yeFvk9x0wWI", "vMDHu7Lxcgw"], "start_seconds": ["30", "410"], "properties": ["door, open, birds", "two, yell, child"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "a boy playing on a trampoline in the backyard"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a woman is speaking and a child is shouting"], "question": "which entity is more likely to be a scream", "label": 1}, {"captions": ["animals bleat and cry out and then a woman speaks", "an infant crying as a woman laughs"], "sample_ids": ["yZp6xizR0yU", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["animal, bleat, cry", "a, laugh, infant"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a baby cries and a woman speaks"], "question": "which entity is crying", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["y8WEcpOlT3I", "tDVADusiIoc"], "start_seconds": ["40", "60"], "properties": ["wind, speak, buffeting", "water, radio, man"], "captions_pred_video": ["on how to use a sewing machine youtube", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio?", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "an infant crying as a woman laughs"], "sample_ids": ["xKB8O8LTs6s", "xhmRY9yhC7c"], "start_seconds": ["70", "20"], "properties": ["music, radio, gunshots", "a, laugh, infant"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a baby cries and a woman speaks"], "question": "which entity is about a woman?", "label": 0}, {"captions": ["a man talks as something metal hits against and glass is set down", "birds chirp and an owl hoots before a man speaks briefly"], "sample_ids": ["x6ijhqRY38s", "wRBHTgrbiwg"], "start_seconds": ["250", "50"], "properties": ["something metal, glass, hit", "bird, owl, speak"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "of a bee pollinating the flowers in the field"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "birds are chirping and insects are buzzing"], "question": "which entity has more animals", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "a stream of water flows as people talk and wind blows"], "sample_ids": ["ylpYOorfH4o", "xBxDz0CFVn0"], "start_seconds": ["410", "30"], "properties": ["engine, run, loud", "stream, water, flow"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "people applaud and hoot and chat quietly"], "sample_ids": ["zl9Dqx-j7q4", "wwyfGO2J4"], "start_seconds": ["6", "90"], "properties": ["motors rev, laugh, loudly", "people, applaud, hoot"], "captions_pred_video": ["footage of a man driving a car in the dark", null], "captions_pred_audio": ["a jet engine roars ", "people are clapping and speaking with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a frog vocalizes while birds chirp", "wind blows as people chatter quietly"], "sample_ids": ["vMf1dLD6Sng", "xBxDz0CFVn0"], "start_seconds": ["6", "30"], "properties": ["frog, bird, vocalize", "wind, chatter, people"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", "footage is blurry and out of focus"], "captions_pred_audio": ["a frog croaks loudly", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "winds blows roughly as a vehicle races past"], "sample_ids": ["wudZTNBtVqc", "xjvTpk2Zpr8"], "start_seconds": ["60", "70"], "properties": ["accelerates, engine, wind", "wind, blows, vehicle"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a rumble grows louder", "several insects fly while two men talk"], "sample_ids": ["y4MY9mp8-TA", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["loudness, increase, rumble", "several, fly, men"], "captions_pred_video": ["a helicopter flying in the sky", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a helicopter flies overhead ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more likely to be in a forest", "label": 1}, {"captions": ["someone is snoring while sleeping", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["ujMt0-D-x2k", "wqZ135Ssz0"], "start_seconds": ["0", "60"], "properties": ["snore, sleep, someone", "two men, woman, birds"], "captions_pred_video": ["of the dog playing with a toy on the floor", null], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a baby laugh at a sputter", "a baby cries and fusses, a woman speaks, and a man speaks"], "sample_ids": ["sLUnaPT5gM8", "wyllXV6PjKo"], "start_seconds": ["0", "30"], "properties": ["laugh, sputter, baby", "a baby, a woman, a man"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", null], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a woman speaks and a baby cries"], "question": "which baby is crying", "label": 1}, {"captions": ["a goat bleats as a person speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["tPJvjq9QePY", "wwyfGO2J4"], "start_seconds": ["40", "90"], "properties": ["bleats, person, speak", "people, applaud, hoot"], "captions_pred_video": ["a dog and a sheep in a barn", null], "captions_pred_audio": ["a baby cries and a man speaks", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["long loud burping by a man", "some men converse over an engine running"], "sample_ids": ["xmiUIOhtZyQ", "sCiy7QS1U"], "start_seconds": ["60", "300"], "properties": ["loud, burp, man", "men, converse, engine"], "captions_pred_video": ["homer simpson drinking a beer", null], "captions_pred_audio": ["a person burps and music plays in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a man?", "label": 0}, {"captions": ["an engine runs and a man speaks", "a child speaks in closed space"], "sample_ids": ["yT5WfYMRr-U", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["engine, run, man", "child, space, speak"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["sZPuqDgX2V0", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["commentator, race, track", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a video of a race", "label": 0}, {"captions": ["a woman speaks followed by clicks and scraping", "a woman speaks as she rubs two objects together"], "sample_ids": ["yYJksgsxx5U", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["audio, clicks, scraping", "two objects, woman, speak"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "paper is crumpling consistently"], "sample_ids": ["yaln9y8I7ms", "v5cSxLaHADY"], "start_seconds": ["230", "0"], "properties": ["female, flushes, toilet", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage is blurry and out of focus", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a toilet flushes and a man speaks", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "a person is burping then speaks and laughs"], "sample_ids": ["w2M4i1mklOA", "wAAkbZToh8"], "start_seconds": ["30", "0"], "properties": ["loud, chime, bell", "burp, laugh, speak"], "captions_pred_video": ["footage of an antique clock", null], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a man burps and a woman speaks"], "question": "which entity is speaking", "label": 1}, {"captions": ["wind blows strongly and a young man speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["vs65y4qmyBE", "wwyfGO2J4"], "start_seconds": ["340", "90"], "properties": ["wind, blows, strongly", "people, applaud, hoot"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be a natural phenomenon", "label": 0}, {"captions": ["a door slams shut roughly", "water splashes and a door squeaks"], "sample_ids": ["zkKdxzNC97Y", "sdXV-ylviw"], "start_seconds": ["27", "190"], "properties": ["a door, slams, shut", "sound, splash, door"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "a dog barks and taps with background noise "], "question": "which door is squeaking", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a woman sneezes then speaks"], "sample_ids": ["tDVADusiIoc", "x4dZyf9Gbj0"], "start_seconds": ["60", "130"], "properties": ["water, radio, man", "sneezes, speaks, woman"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman sneezes and speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["uJV8NDaHqqk", "ziUT9IFTkjg"], "start_seconds": ["100", "10"], "properties": ["loud, fly, chirp", "background, birds, rustling"], "captions_pred_video": ["a bee hive in a wooden box", null], "captions_pred_audio": ["a swarm of bees buzzing around", "birds are chirping and a chime is ringing "], "question": "which entity is quieter", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["zF8yoL0rkbI", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["engine, run, someone", "two men, woman, birds"], "captions_pred_video": ["footage of the traffic on the street at night", null], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sxYkFKFIZD0", "uZesmtKZGSw"], "start_seconds": ["20", "250"], "properties": ["screech, man, door", "men, talk, cars"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a car speeding up in the distance", "vehicles pass by on a roadway"], "sample_ids": ["u0TrcHhkPQ", "tgbONvsP47Y"], "start_seconds": ["20", "0"], "properties": ["distance, car, speed", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a car is driving on the road "], "question": "which vehicle is moving faster", "label": 0}, {"captions": ["children speak as a female ask them questions", "a clock ticktocks"], "sample_ids": ["wEBlkGWVWwE", "v-g-j2uTByM"], "start_seconds": ["260", "30"], "properties": ["female, speak, questions", "ticktocks, clock, ticktocks"], "captions_pred_video": ["shows a person writing on the whiteboard", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "a drill drills through something then people begin laughing"], "sample_ids": ["t25U-v4k4ts", "tEE3MpBt1sg"], "start_seconds": ["40", "50"], "properties": ["a, chirps, bird", "drill, something, laugh"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "people are laughing breathing and speaking with background noise "], "question": "which entity is about a drill?", "label": 1}, {"captions": ["someone whistles a tune", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sIXTftIuUgw", "wqZ135Ssz0"], "start_seconds": ["90", "60"], "properties": ["someone, tune, whistle", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person whistling a song", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "birds tweet and squawk"], "sample_ids": ["sWZzXuWYY", "w1mlz3Pe4fU"], "start_seconds": ["420", "300"], "properties": ["male, clanks, thumps", "squawk, tweet, scream"], "captions_pred_video": [null, "of a bird in a cage"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "birds are chirping and singing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a child speaks in closed space", "people speak softly as food sizzles"], "sample_ids": ["yW6FWLSLkx4", "yhQ2Lg-7qDY"], "start_seconds": ["40", "130"], "properties": ["child, space, speak", "food, sizzle, speak"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "a pan filled with meat and sauce being cooked on a stove top"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a faucet is running and a man is speaking"], "question": "which entity is more likely to be in a closed space", "label": 0}, {"captions": ["a person sniffs and sneezes", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["uRlbY6aoBU", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["sneezes, person, sniffs", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is sneezing ", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a person", "label": 0}, {"captions": ["a woman speaks and food sizzles while frying", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["wTideSjRFS0", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["food, sizzle, woman", "People, motor, brakes"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a woman speaking and food sizzling while frying?", "label": 0}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "people cheer as a vehicle engine revs"], "sample_ids": ["uC9dtII1KDI", "xjhAnI2q6hM"], "start_seconds": ["150", "6"], "properties": ["wind, gusts, distance", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is more likely to be in motion", "label": 1}, {"captions": ["a stream of water flows quickly", "people speak as gunfire rings out"], "sample_ids": ["wbHTKEJZyhc", "wqTCwqVRDlk"], "start_seconds": ["20", "80"], "properties": ["stream, water, flow", "gunfire, ring, speak"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "wind blowing followed by a zoom"], "sample_ids": ["uOpoD0gGXcs", "vr8ZXjEBhMQ"], "start_seconds": ["120", "150"], "properties": ["chirps, woman, bird", "wind, blow, zoom"], "captions_pred_video": ["a herd of cows grazing in the field", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["birds are chirping and a man is speaking", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more likely to blow", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "a man speaks as a car is passing by"], "sample_ids": ["vXlk0lIQBFo", "sK4u5T8hW78"], "start_seconds": ["470", "30"], "properties": ["wind, talk, vocalize", "a, car, pass"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "children speak and play together"], "sample_ids": ["xjvTpk2Zpr8", "yVVP8XvWJTo"], "start_seconds": ["70", "260"], "properties": ["wind, blows, vehicle", "children, speak, play"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "footage of a playground at a school or daycare center"], "captions_pred_audio": ["a jet engine roars and wind blows ", "children are speaking and breathing with background noise "], "question": "which entity is more likely to be in a vehicle", "label": 0}, {"captions": ["an emergency siren wails as it passes", "wind blows as people chatter quietly"], "sample_ids": ["vGj1XLJvNrw", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["wails, wails, pass", "wind, chatter, people"], "captions_pred_video": ["footage of a police car driving down a city street", "footage is blurry and out of focus"], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["v0x1odnXtP0", "yajyRTUQk3U"], "start_seconds": ["210", "400"], "properties": ["keyboard, type, computer", "a woman, something, fried"], "captions_pred_video": ["how to make money on youtube in spanish", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a person is typing on a keyboard", "a woman is speaking while food is frying in the background"], "question": "which entity is a demonstration of cooking?", "label": 1}, {"captions": ["a vehicle engine runs while a siren and horn sound", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["u--KhUW8l1Y", "tDVADusiIoc"], "start_seconds": ["0", "60"], "properties": ["engine, sound, horn", "water, radio, man"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["multiple birds vocalize and wind blows", "a man speaks as a machine runs"], "sample_ids": ["uoGVs9yUqY4", "vD6lYD1l0BY"], "start_seconds": ["30", "330"], "properties": ["multiple, vocalize, wind", "a, machine, run"], "captions_pred_video": ["for how to make a wooden shed door youtube", "game controller being held in the hands of the person"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a man is speaking and dishes are being washed "], "question": "which entity is a man speaking to a machine?", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["vcmWSmvti8", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["music, man, fire", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "a consistent ticking pattern"], "sample_ids": ["vimzuGQvdcU", "sCeWURVHfOM"], "start_seconds": ["30", "30"], "properties": ["a, man, yells", "ticking, pattern, clock"], "captions_pred_video": ["a group of people are rafting down a river", "- a close-up view of the clock's inner workings"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "ticking of a clock"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a baby cries and wails as an adult female speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["zliInBdC98Y", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["a, baby, cries, wails", "water, radio, man"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["y2ZBGpgbhHM", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["animal, growl, bird", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["birds chirping and a dog panting", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a baby cries and a woman moans", "dishes cling together then a man begins to speak"], "sample_ids": ["smDKStoHBJo", "sQGXqGcwOTc"], "start_seconds": ["0", "3"], "properties": ["a, cry, woman", "cling, speak, dishes"], "captions_pred_video": ["a man holding a crying baby in his arms", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "mechanisms are operating and water is splashing "], "question": "which entity is about a baby crying and a woman moaning?", "label": 0}, {"captions": ["an animal quacks rapidly", "a man speaks as a vehicles passes by then a woman speaks"], "sample_ids": ["vh30P49Po6s", "siJFXfGWgDk"], "start_seconds": ["30", "50"], "properties": ["animal, quacks, rapidly", "man, woman, vehicle"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking and birds are chirping in the background "], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["the revving of an engine throttle followed by a man speaking", "several insects fly while two men talk"], "sample_ids": ["tezvROoo4bs", "s-T9OVOiMLo"], "start_seconds": ["40", "330"], "properties": ["audio, throttle, speaking", "several, fly, men"], "captions_pred_video": ["footage of a busy city street with cars parked on both sides of the road", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a car accelerates and revs while a man speaks ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "a duck quacks several times"], "sample_ids": ["yeFvk9x0wWI", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["clack, bird, chirp", "quacks, duck, several"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 0}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["uzQnlJXBbOM", "uYT5gxnyMWM"], "start_seconds": ["50", "50"], "properties": ["ringing, beep, stop", "female, spraying, scream"], "captions_pred_video": ["footage of a person using a cell phone on a table", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a telephone rings and a man speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["a clock ticks quietly and rhythmically", "winds blows roughly as a vehicle races past"], "sample_ids": ["u7C-AEBQM", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["ticks, rhythmic, quiet", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a ticktock of a clock", "a jet engine roars and wind blows "], "question": "which entity is louder", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["weDbePuc-Xc", "xfaoyyzw2WU"], "start_seconds": ["40", "180"], "properties": ["music, slaps, human", "loud, jet engine, roar"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "a horn rings out as a machine runs by"], "sample_ids": ["u5RmF3c3Aw", "slZLHwNbbt4"], "start_seconds": ["60", "300"], "properties": ["engine, car, zoom", "a, horn, run"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vzceMbklWc", "tDVADusiIoc"], "start_seconds": ["180", "60"], "properties": ["water, faucet, sink", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["water is running and a man is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "water flows as men speak and yell"], "sample_ids": ["tGcFnX0GHI", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["ring, talk, woman", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "vehicles pass by on a roadway"], "sample_ids": ["tPJvjq9QePY", "tgbONvsP47Y"], "start_seconds": ["40", "0"], "properties": ["animal, bleat, moo", "pass, vehicle, roadway"], "captions_pred_video": ["a dog and a sheep in a barn", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a baby cries and a man speaks", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "a man speaks, another man speaks, and a small bell dings"], "sample_ids": ["uEU-Hg5MTN8", "t69a8aRKhmc"], "start_seconds": ["27", "30"], "properties": ["animal, grunts, snorts", "a, b, c"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking and birds are chirping in the background "], "question": "which entity has a ding?", "label": 1}, {"captions": ["a person speaks briefly", "vehicles pass by on a roadway"], "sample_ids": ["zOZleIRqZm4", "tgbONvsP47Y"], "start_seconds": ["80", "0"], "properties": ["person, talk, brief", "pass, vehicle, roadway"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a car is driving on the road "], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["a goat screams and people speak in the background", "a woman speaks happily and an animal chirps"], "sample_ids": ["xC8kbrKJmco", "uWAAAL4CIoc"], "start_seconds": ["0", "0"], "properties": ["background, goat, scream", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a goat is bleating ", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "three men talk while wind blows and some liquid flows"], "sample_ids": ["slZLHwNbbt4", "vJ7JPEFhyLA"], "start_seconds": ["300", "16"], "properties": ["a, horn, run", "three men, wind, flow"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a machine running by?", "label": 0}, {"captions": ["food fries in a pan as someone talks and cooks", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["ukxt9I7eMMg", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["food, pan, cook", "two men, woman, birds"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "a telephone rings followed by a woman talking"], "sample_ids": ["rqu8iB22IY", "tGcFnX0GHI"], "start_seconds": ["5", "0"], "properties": ["sound, repeats, laugh", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a dial tone sounds followed by a woman speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "a person is burping while a girl speaks"], "sample_ids": ["yRx9txMcBl0", "vdoxuJn9lTc"], "start_seconds": ["40", "40"], "properties": ["accelerates, tires, squeals", "person, burp, girl"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a group of young girls playing a video game together in a living room"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a child speaks followed by a burp"], "question": "which entity is a person", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "roadway noise occurs and a truck accelerates"], "sample_ids": ["vJvryTwuAV8", "tgbONvsP47Y"], "start_seconds": ["16", "0"], "properties": ["audience, cheer, man", "noise, truck, accelerate"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a car is driving on the road "], "question": "which is not a person", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["xO-Q2BlIIPU", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["two men, exclamation, speak", "a woman, laughs, animal"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking and laughing?", "label": 1}, {"captions": ["a helicopter engine idles continuously", "a man speaks while water trickles and flows"], "sample_ids": ["ugHJF0hfYkg", "sapQIQUhFc"], "start_seconds": ["10", "280"], "properties": ["engine, idle, continuously", "water, trickles, flow"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking and a stream is flowing in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "roadway noise occurs and a truck accelerates"], "sample_ids": ["ukg5L09Wpvo", "tgbONvsP47Y"], "start_seconds": ["150", "0"], "properties": ["a train, a horn, a bell", "noise, truck, accelerate"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a car is driving on the road "], "question": "which is not a vehicle", "label": 0}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "a man speaks followed by another man speaking outside"], "sample_ids": ["yZmhM1HcsyE", "viuTg1M-dqg"], "start_seconds": ["4", "30"], "properties": ["engine, roar, water", "two men, speak, follow"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of two men speaking?", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "a toilet flushes and a female speaks"], "sample_ids": ["sQwlkXjQabo", "yaln9y8I7ms"], "start_seconds": ["10", "230"], "properties": ["liquid, surface, spray", "female, flushes, toilet"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "footage is blurry and out of focus"], "captions_pred_audio": ["spraying followed by silence", "a toilet flushes and a man speaks"], "question": "which entity is a video of a toilet flushing?", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wvKpEYswXO0", "zj2R0XoFr5k"], "start_seconds": ["150", "50"], "properties": ["sound, water, running", "airplane, boy, fly"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a woman speaks while a helicopter flies overhead "], "question": "which entity has a boy speaking?", "label": 1}, {"captions": ["vehicles pass by on a roadway", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tgbONvsP47Y", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["pass, vehicle, roadway", "a woman, something, fried"], "captions_pred_video": ["footage of a fire truck entering a garage", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a car is driving on the road ", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["people speak softly as food sizzles", "a car accelerates and wind blows"], "sample_ids": ["yhQ2Lg-7qDY", "u0TrcHhkPQ"], "start_seconds": ["130", "20"], "properties": ["food, sizzle, speak", "accelerates, wind, blows"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", null], "captions_pred_audio": ["a faucet is running and a man is speaking", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["birds chirp as a train approaches", "paper is crumpling consistently"], "sample_ids": ["xM4joTqDVp4", "v5cSxLaHADY"], "start_seconds": ["160", "0"], "properties": ["bird, chirp, train", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["birds are chirping and a train is moving ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["material crumbles into a microphone", "a infant makes noise and is excited"], "sample_ids": ["vofpvUo6NAw", "wIJK3-5y0kA"], "start_seconds": ["220", "30"], "properties": ["material, crumbles, microphone", "noise, excited, infant"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "someone is typing on a computer keyboard"], "sample_ids": ["uWPRNLnpy7Y", "v0x1odnXtP0"], "start_seconds": ["10", "210"], "properties": ["accelerate, laugh, vehicle", "keyboard, type, computer"], "captions_pred_video": ["is taken from a car driving down the street", "how to make money on youtube in spanish"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a person is typing on a keyboard"], "question": "which is a person", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "an insect buzzes around continuously"], "sample_ids": ["s4Uz1Ffgo04", "v25l1jef3JY"], "start_seconds": ["100", "0"], "properties": ["water, rushes, motorcycle", "buzzes, continuously, insect"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a fly is buzzing around a microphone "], "question": "which entity is moving faster", "label": 0}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "an insect buzzes around continuously"], "sample_ids": ["tDVADusiIoc", "v25l1jef3JY"], "start_seconds": ["60", "0"], "properties": ["water, radio, man", "buzzes, continuously, insect"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks while water drains", "a train horn blows as it passes by"], "sample_ids": ["vSeGhaZt-aI", "zVacuqSb4LI"], "start_seconds": ["50", "30"], "properties": ["water, drain, man", "horn, blows, train"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is moving", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "a child speaks in closed space"], "sample_ids": ["sZPuqDgX2V0", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["engine, accelerate, intercom", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a child babbles as a woman speaks", "a horn blasts loudly as a train passes"], "sample_ids": ["wEBlkGWVWwE", "zsLxS-uLJTw"], "start_seconds": ["260", "20"], "properties": ["a, babble, woman", "horn, blast, train"], "captions_pred_video": ["shows a person writing on the whiteboard", "footage of the train on the tracks at sunrise or sunset"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a train blows its horn and moves on the tracks "], "question": "which entity is louder", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "some men converse over an engine running"], "sample_ids": ["xKB8O8LTs6s", "sCiy7QS1U"], "start_seconds": ["70", "300"], "properties": ["music, gunfire, explosion", "men, converse, engine"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", null], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is more calm", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "a horn rings out as a machine runs by"], "sample_ids": ["zF8yoL0rkbI", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["engine, run, someone", "a, horn, run"], "captions_pred_video": ["footage of the traffic on the street at night", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "a man speaks as a car is passing by"], "sample_ids": ["yajyRTUQk3U", "sK4u5T8hW78"], "start_seconds": ["400", "30"], "properties": ["noise, woman, speak", "a, car, pass"], "captions_pred_video": ["- a woman cooking in the kitchen", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more quiet", "label": 1}, {"captions": ["goats bleat and people speak", "three men talk while wind blows and some liquid flows"], "sample_ids": ["z5iUE5h0EPs", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["goats bleat, people speak, language", "three men, wind, flow"], "captions_pred_video": ["of the goat in the barn", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a goat bleats and a man speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "wind blows as people chatter quietly"], "sample_ids": ["sEprKHm8Sj8", "xBxDz0CFVn0"], "start_seconds": ["90", "30"], "properties": ["car, tires, slows", "wind, chatter, people"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "footage is blurry and out of focus"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["zF8yoL0rkbI", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["engine, run, someone", "engine, idle, woman"], "captions_pred_video": ["footage of the traffic on the street at night", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a woman is speaking and a subway train is moving "], "question": "which entity has a running engine", "label": 0}, {"captions": ["small dogs growl, bark and yip.", "a woman speaks as she rubs two objects together"], "sample_ids": ["sShpyu2l4YQ", "vzxHnu-SFEw"], "start_seconds": ["0", "80"], "properties": ["growl, bark, yip", "two objects, woman, speak"], "captions_pred_video": ["the puppies are playing with a toy", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a dog is barking and growling", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["a baby cries and wails as an adult female speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["zliInBdC98Y", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["a, baby, cries, wails", "engine, laugh, loud"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a baby cries and a woman speaks", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["two frogs croak at each other", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["zg0X6BnhOLQ", "xfaoyyzw2WU"], "start_seconds": ["410", "180"], "properties": ["two frogs, croak, at each other", "loud, jet engine, roar"], "captions_pred_video": ["footage of lightning in the sky at night", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a frog is croaking", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "a man speaks as a car is passing by"], "sample_ids": ["y8dSeubCNI", "sK4u5T8hW78"], "start_seconds": ["4", "30"], "properties": ["men, women, car", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["an engine revving and people talking in the background", "a man is speaking with background noise and breathing sounds "], "question": "which car is revving and accelerating loudly", "label": 0}, {"captions": ["birds chirp and a dog breathes heavily", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["y2ZBGpgbhHM", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["dog, chirp, breathe", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds chirping and a dog panting", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more birds", "label": 1}, {"captions": ["a child yells and another yells", "a train horn blows as it passes by"], "sample_ids": ["vMDHu7Lxcgw", "zVacuqSb4LI"], "start_seconds": ["410", "30"], "properties": ["two, yell, child", "horn, blows, train"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which is louder", "label": 0}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["w6RTHR6AeAg", "ukg5L09Wpvo"], "start_seconds": ["40", "150"], "properties": ["call, owl, screech", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a helicopter engine idles continuously", "a train horn blows as it passes by"], "sample_ids": ["ugHJF0hfYkg", "zVacuqSb4LI"], "start_seconds": ["10", "30"], "properties": ["engine, idle, continuously", "horn, blows, train"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a helicopter is flying overhead ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a whistling owl calls out repeatedly and insects screech"], "sample_ids": ["zY3icUyMdh8", "w6RTHR6AeAg"], "start_seconds": ["20", "40"], "properties": ["dog, bark, engine", "call, owl, screech"], "captions_pred_video": ["footage of a bus driving through a residential street at night", null], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "an owl hoots and mechanisms operate "], "question": "which entity is a bird", "label": 1}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "a chime of a clock followed by various tones of ticking with come clinking"], "sample_ids": ["yNtRmrn0io8", "uqFtmnhuqA8"], "start_seconds": ["210", "30"], "properties": ["storm, distance, strike", "a, b, c"], "captions_pred_video": ["footage of a house in the middle of the night", "shows a clock on the wall of a room with a person standing in front of it"], "captions_pred_audio": ["rain falls and thunder roars", "mechanisms are ticking and a hammer is striking "], "question": "which entity is a clock?", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["t25U-v4k4ts", "wDVMhEdTiVw"], "start_seconds": ["40", "30"], "properties": ["a, chirps, bird", "gun, shoot, water"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to cause harm", "label": 1}, {"captions": ["multiple insects buzz over rustling wind", "small dogs yip and bark sharply"], "sample_ids": ["tMJne1a4AFI", "v-wcQf4BDY0"], "start_seconds": ["0", "120"], "properties": ["wind, buzz, rustling", "bark, yip, sharply"], "captions_pred_video": ["a swarm of bees on the ground", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a swarm of bees buzzing around", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["heavy rain splashes as it falls", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["wP8ZKrlx3oA", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["fall, rain, splash", "animal, grunts, snorts"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a woman is speaking and a baby is crying"], "question": "which entity is not a splash", "label": 1}, {"captions": ["water flows as men speak and yell", "a child speaks in closed space"], "sample_ids": ["vJ7JPEFhyLA", "yW6FWLSLkx4"], "start_seconds": ["16", "40"], "properties": ["water, flow, men", "child, space, speak"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["continuous sneezing together with speech", "waves crash against a shoreline and people speak"], "sample_ids": ["x4dZyf9Gbj0", "yFB25fqfU8I"], "start_seconds": ["130", "300"], "properties": ["continuous, sneeze, speech", "wave, crash, shoreline"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a woman sneezes and speaks", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "an engine runs loudly"], "sample_ids": ["ul60S8TXDA8", "vqZuVbG6-HI"], "start_seconds": ["60", "130"], "properties": ["sound, distance, bell", "loud, engine, run"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["s59PfAghdkM", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["bird, chirp, background, horse, neigh", "female, spraying, scream"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["wP8ZKrlx3oA", "tDVADusiIoc"], "start_seconds": ["40", "60"], "properties": ["rain, storm, thunder", "water, radio, man"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "some tunes played by whistling"], "sample_ids": ["uqFtmnhuqA8", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["a, b, c", "tune, play, whistling"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a person whistling a song"], "question": "which entity is a musical composition", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vddP56-ogds", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["liquid, laughs, man", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["a church bell rings several times", "a train horn blows as it passes by"], "sample_ids": ["sUVVjE3Ucp8", "zVacuqSb4LI"], "start_seconds": ["0", "30"], "properties": ["ring, bell, several", "horn, blows, train"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a church bell is ringing ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which is louder", "label": 0}, {"captions": ["people clap and speak in the distance", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["wwyfGO2J4", "xfaoyyzw2WU"], "start_seconds": ["90", "180"], "properties": ["clap, distance, speak", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["a person snoring", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["t8tv5YRMJUg", "w5W5Kqtc8E"], "start_seconds": ["0", "100"], "properties": ["a person, snore, loud", "wind, blow, vehicle"], "captions_pred_video": ["of a man getting his face licked by another man", null], "captions_pred_audio": ["a person sniffs and breathes heavily", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is not a person?", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a stream of water runs briefly"], "sample_ids": ["uzQnlJXBbOM", "x-PeY8Yb8M4"], "start_seconds": ["50", "300"], "properties": ["ringing, beep, stop", "stream, water, run"], "captions_pred_video": ["footage of a person using a cell phone on a table", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a telephone rings and a man speaks", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a motor runs in the distance as a soft wind periodically gusts"], "sample_ids": ["sfAvvZwdLCY", "xyL9F5VrjkE"], "start_seconds": ["20", "20"], "properties": ["water drains, flushes, water", "wind, motor, distance"], "captions_pred_video": ["footage of the toilet in the bathroom", "of a caterpillar truck loading logs into a trailer"], "captions_pred_audio": ["a toilet is flushed", "the wind is blowing and a car is passing by "], "question": "which entity is a source of water", "label": 0}, {"captions": ["waves crash against a shoreline and wind blows", "a woman speaks as she rubs two objects together"], "sample_ids": ["zdYdyF9-m8U", "vzxHnu-SFEw"], "start_seconds": ["7", "80"], "properties": ["wind, crash, shoreline", "two objects, woman, speak"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["waves crash and wind blows ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a physical action", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["vzxHnu-SFEw", "zFjIWfSD-4"], "start_seconds": ["80", "410"], "properties": ["two objects, woman, speak", "People, motor, brakes"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", null], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "a clock ticktocks"], "sample_ids": ["uEU-Hg5MTN8", "v-g-j2uTByM"], "start_seconds": ["27", "30"], "properties": ["animal, grunts, snorts", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a stream of water flows quickly", "people applaud and hoot and chat quietly"], "sample_ids": ["wbHTKEJZyhc", "wwyfGO2J4"], "start_seconds": ["20", "90"], "properties": ["stream, water, flow", "people, applaud, hoot"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", null], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["an airplane engine spools and people speak", "a clock ticktocks"], "sample_ids": ["wTjoRj1se3U", "v-g-j2uTByM"], "start_seconds": ["390", "30"], "properties": ["airplane, engine, spool", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a jet engine is running and people are talking", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["water splashes as an animal walks through", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["w1ir-sZ3Im8", "uYT5gxnyMWM"], "start_seconds": ["90", "50"], "properties": ["animal, water, splashes", "female, spraying, scream"], "captions_pred_video": ["footage of a group of people riding horses through a river", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["water splashes and gurgles as people speak", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["food is frying and sizzles", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["zNRChLjqcU", "vfYTJq7nU"], "start_seconds": ["220", "130"], "properties": ["food is frying, sizzles, food", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running from a faucet into a sink", "a duck quacks and a woman speaks"], "question": "which entity is about a duck?", "label": 1}, {"captions": ["material crumbles into a microphone", "a man speaks followed by another man speaking outside"], "sample_ids": ["vofpvUo6NAw", "viuTg1M-dqg"], "start_seconds": ["220", "30"], "properties": ["material, crumbles, microphone", "two men, speak, follow"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of two men speaking?", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "a toilet flushes and a female speaks"], "sample_ids": ["zcDwZ6W7E3E", "yaln9y8I7ms"], "start_seconds": ["180", "230"], "properties": ["man, speak, motorcycles", "female, flushes, toilet"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a toilet flushes and a man speaks"], "question": "which entity is a video of a toilet flushing?", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "an infant crying frantically"], "sample_ids": ["xERFUeZONz8", "zwOBqeFTgiU"], "start_seconds": ["0", "30"], "properties": ["ring, approach, traffic", "cry, infant, frantically"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "of the baby crying in the car seat"], "captions_pred_audio": ["an emergency vehicle siren blares", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "a stream of water flows as people talk and wind blows"], "sample_ids": ["x9JovgqUcs", "xBxDz0CFVn0"], "start_seconds": ["500", "30"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "paper is crumpling consistently"], "sample_ids": ["vs65y4qmyBE", "v5cSxLaHADY"], "start_seconds": ["340", "0"], "properties": ["engine, run, man", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "a machine beeps continuously"], "sample_ids": ["wtDqrBygTcU", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["man, engine, run", "beeps, machine, continuously"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", null], "captions_pred_audio": ["a man is speaking and a motor is running", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["water pouring and bubbling", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["uyRfq-jKPpo", "uYT5gxnyMWM"], "start_seconds": ["50", "50"], "properties": ["water, bubbles, pouring", "female, spraying, scream"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["water is running from a faucet", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking and spraying?", "label": 1}, {"captions": ["birds chirp and objects are moved around", "multiple people speak and children yell while water gurgles"], "sample_ids": ["yPUYU6t3rwo", "vb1fPSDI4c"], "start_seconds": ["370", "30"], "properties": ["birds chirp, objects are moved around, birds", "multiple, people, yell"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", null], "captions_pred_audio": ["insects buzz and a man speaks", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["continuous chugging with birds chirping in the background", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["xM4joTqDVp4", "vbZ-0lGPneg"], "start_seconds": ["160", "30"], "properties": ["background, chirp, birds", "a woman, a television program, a bird"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a woman is speaking and a dog is whimpering"], "question": "which entity has birds chirping in the background?", "label": 0}, {"captions": ["a person is snoring while sleeping", "vehicles pass by on a roadway"], "sample_ids": ["vJrjSeP17yE", "tgbONvsP47Y"], "start_seconds": ["40", "0"], "properties": ["a person is sleeping, snoring, person", "pass, vehicle, roadway"], "captions_pred_video": ["a black background with a small plane flying in the sky", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a person snoring loudly", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["zY3icUyMdh8", "vlS6YMeWAPo"], "start_seconds": ["20", "40"], "properties": ["dog, bark, engine", "sheep, baa, birds"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp as a bell rings", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["ziUT9IFTkjg", "vbZ-0lGPneg"], "start_seconds": ["10", "30"], "properties": ["chirp, bell, ring", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program?", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "a car accelerates and wind blows"], "sample_ids": ["wTideSjRFS0", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["food, sizzle, woman", "accelerates, wind, blows"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman and man speak while food is frying", "a vehicle engine accelerating then running on idle"], "sample_ids": ["zk-xJGQU8-4", "vYkA3cfXp5Q"], "start_seconds": ["130", "30"], "properties": ["food, man, woman", "engine, accelerate, idle"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a machine beeps continuously"], "sample_ids": ["vBHyYJ8pL0", "y682ml90jGw"], "start_seconds": ["2", "11"], "properties": ["noise, door, opening", "beeps, machine, continuously"], "captions_pred_video": [null, null], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a beeping sound is being made "], "question": "which entity is accompanied by a door opening and closing", "label": 0}, {"captions": ["a man speaks over a running engine and blowing wind", "pigeons vocalize and birds chirp"], "sample_ids": ["ylpYOorfH4o", "uiS58TNyUiw"], "start_seconds": ["410", "430"], "properties": ["engine, running, wind", "vocalize, bird, chirp"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking and a bee is buzzing"], "question": "which entity is not a person", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "several insects fly while two men talk"], "sample_ids": ["w-4gHptFNuU", "s-T9OVOiMLo"], "start_seconds": ["21", "330"], "properties": ["engine revs, accelerates, bump", "several, fly, men"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a moving object", "label": 0}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "a clock ticktocks briefly"], "sample_ids": ["wP8ZKrlx3oA", "u7C-AEBQM"], "start_seconds": ["40", "30"], "properties": ["rain, storm, thunder", "ticktocks, clock, ticktocks briefly"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", null], "captions_pred_audio": ["a heavy rain is falling on a surface", "a ticktock of a clock"], "question": "which entity is a clock?", "label": 1}, {"captions": ["a low rumbling in the distance followed by a motorcycle engine revving up", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vr8ZXjEBhMQ", "uZesmtKZGSw"], "start_seconds": ["150", "250"], "properties": ["sound, distance, engine", "men, talk, cars"], "captions_pred_video": ["is taken from a motorcycle's point of view", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about cars?", "label": 1}, {"captions": ["scraping and female speech with distant music", "birds chirp and objects are moved around"], "sample_ids": ["yHeVV-xeOxQ", "yPUYU6t3rwo"], "start_seconds": ["130", "370"], "properties": ["female, speech, music", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a girl milking a goat's udder", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["waves crash against a shoreline and people speak", "a person snores loudly multiple times at a close distance"], "sample_ids": ["yFB25fqfU8I", "sSMl2vc3ek"], "start_seconds": ["300", "20"], "properties": ["wave, crash, shoreline", "loud, multiple, distance"], "captions_pred_video": ["footage of a person surfing in the ocean", null], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "waves crash against a shoreline and people speak"], "sample_ids": ["vJvryTwuAV8", "yFB25fqfU8I"], "start_seconds": ["16", "300"], "properties": ["audience, cheer, man", "wave, crash, shoreline"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more active", "label": 1}, {"captions": ["a door slams shut roughly", "multiple ducks quack continuously"], "sample_ids": ["zkKdxzNC97Y", "wfHeoPDLMaM"], "start_seconds": ["27", "30"], "properties": ["a door, slams, shut", "multiple, quack, continuously"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire"], "captions_pred_audio": ["a door is opened and closed", "ducks are quacking"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaking with light rustling", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["zOZleIRqZm4", "wqZ135Ssz0"], "start_seconds": ["80", "60"], "properties": ["light, rustling, man", "two men, woman, birds"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["water flows followed by women screaming", "a man speaks then blows a vehicle horn as wind blows"], "sample_ids": ["w5W5Kqtc8E", "zALy31PjDl0"], "start_seconds": ["100", "21"], "properties": ["water, flow, women", "a man, a vehicle, a horn"], "captions_pred_video": [null, "a motorcycle is parked on the side of a brick walkway"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking and a car horn is honking"], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["multiple ducks quack continuously", "some men converse over an engine running"], "sample_ids": ["wfHeoPDLMaM", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["multiple, quack, continuously", "men, converse, engine"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", null], "captions_pred_audio": ["ducks are quacking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a train horn sounds as it passes by", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["ukg5L09Wpvo", "xfaoyyzw2WU"], "start_seconds": ["150", "180"], "properties": ["sound, train, horn", "loud, jet engine, roar"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vBHyYJ8pL0", "vb1fPSDI4c"], "start_seconds": ["2", "30"], "properties": ["noise, door, opening", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["several ducks are quacking and squawking", "some men converse over an engine running"], "sample_ids": ["wfHeoPDLMaM", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["quacking, squawking, ducks", "men, converse, engine"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", null], "captions_pred_audio": ["ducks are quacking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a group of people", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "water pouring and bubbling"], "sample_ids": ["vKrYfzleLB8", "uyRfq-jKPpo"], "start_seconds": ["110", "50"], "properties": ["a, ring, gunshots", "water, bubbles, pouring"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks while water drains", "an airplane engine runs"], "sample_ids": ["vSeGhaZt-aI", "yVPZ2MNWpms"], "start_seconds": ["50", "0"], "properties": ["water, drain, man", "engine, airplane, runs"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a man speaks followed by another man speaking outside", "a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays"], "sample_ids": ["viuTg1M-dqg", "sU53zg9Jp7s"], "start_seconds": ["30", "380"], "properties": ["two men, speak, follow", "a bird chirps, a door bell ringing, a woman gasps"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "a cartoon girl is standing in front of a blue couch"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "birds chirp and a doorbell rings with breathing and music in the background "], "question": "which entity has a doorbell ringing?", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["su6FAOcOA8c", "uZesmtKZGSw"], "start_seconds": ["4", "250"], "properties": ["engine, run, woman", "men, talk, cars"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has a woman making an announcement?", "label": 0}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "a person uses a saw to cut some wood"], "sample_ids": ["ziUT9IFTkjg", "sHbXC6na9hg"], "start_seconds": ["10", "0"], "properties": ["background, birds, rustling", "a person, saw, wood"], "captions_pred_video": [null, "a man using a tractor to cut a log into firewood youtube"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "an engine is idling and vibrating"], "question": "which entity is more active", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a car speeding up in the distance"], "sample_ids": ["tOSWIURC-4", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["engine, work, nearby", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a lawn mower is running ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["birds chirp as a bell rings", "some tunes played by whistling"], "sample_ids": ["ziUT9IFTkjg", "u6BnG6YZqJ4"], "start_seconds": ["10", "0"], "properties": ["chirp, bell, ring", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vBHyYJ8pL0", "zl9Dqx-j7q4"], "start_seconds": ["2", "6"], "properties": ["noise, door, opening", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a jet engine roars "], "question": "which entity is followed by a man laughing", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a child speaks in closed space"], "sample_ids": ["sK4u5T8hW78", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["a, car, pass", "child, space, speak"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a weapon fires multiple times", "water splashes as an animal walks through"], "sample_ids": ["sMC07Ucy7kg", "w1ir-sZ3Im8"], "start_seconds": ["10", "90"], "properties": ["weapon, fire, multiple", "animal, water, splashes"], "captions_pred_video": ["footage is from a car's point of view", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "water splashes and gurgles as people speak"], "question": "which entity is not a weapon", "label": 1}, {"captions": ["a cat meows and children speak", "a power tool runs and touches a surface"], "sample_ids": ["x5cuQjOdM3E", "zfvPRf3chY"], "start_seconds": ["30", "290"], "properties": ["cat, speak, children", "power tool, run, touch"], "captions_pred_video": ["a black background with an airplane flying in the sky", null], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking while a power tool is being used "], "question": "which entity is a machine", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["rqu8iB22IY", "zFjIWfSD-4"], "start_seconds": ["5", "410"], "properties": ["sound, repeats, laugh", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running?", "label": 1}, {"captions": ["a car speeding up in the distance", "water flows followed by women screaming"], "sample_ids": ["u0TrcHhkPQ", "w5W5Kqtc8E"], "start_seconds": ["20", "100"], "properties": ["distance, car, speed", "water, flow, women"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is moving faster", "label": 0}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "a horn rings out as a machine runs by"], "sample_ids": ["wqZ135Ssz0", "slZLHwNbbt4"], "start_seconds": ["60", "300"], "properties": ["two men, woman, birds", "a, horn, run"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity has a horn", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "a car speeding up in the distance"], "sample_ids": ["zgUgkpk78xU", "u0TrcHhkPQ"], "start_seconds": ["70", "20"], "properties": ["horn, bells, ring", "distance, car, speed"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", null], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "a duck quacks loudly and continuously"], "sample_ids": ["u7C-AEBQM", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["ticks, rhythmic, quiet", "loud, continuous, quacks"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a ticktock of a clock", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 0}, {"captions": ["people speak as gunfire rings out", "a dog barks and whimpers"], "sample_ids": ["wqTCwqVRDlk", "sShpyu2l4YQ"], "start_seconds": ["80", "0"], "properties": ["gunfire, ring, speak", "barks, whimpers, dog"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "the puppies are playing with a toy"], "captions_pred_audio": ["a man is speaking and a gun is fired", "a dog is barking and growling"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["ylpYOorfH4o", "zl9Dqx-j7q4"], "start_seconds": ["410", "6"], "properties": ["motor, run, steady", "engine, laugh, loud"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["ul60S8TXDA8", "y2bVZ7rz-5M"], "start_seconds": ["60", "280"], "properties": ["sound, distance, bell", "motor noise, horn, siren"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a truck is honking its horn and a siren is blaring "], "question": "which entity has a horn honking and a siren wailing?", "label": 1}, {"captions": ["a man speaks, then dials a rotary telephone", "a person snores loudly multiple times at a close distance"], "sample_ids": ["tK4VlLsNxak", "sSMl2vc3ek"], "start_seconds": ["120", "20"], "properties": ["a, dial, telephone", "loud, multiple, distance"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", null], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a person snoring loudly"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a man speaks followed by another man speaking outside"], "sample_ids": ["uEU-Hg5MTN8", "viuTg1M-dqg"], "start_seconds": ["27", "30"], "properties": ["a woman, laughs, animal", "two men, speak, follow"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking followed by another man speaking outside?", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["ylpYOorfH4o", "tdWhHV3X25Q"], "start_seconds": ["410", "60"], "properties": ["engine, run, loud", "applause, audience, yells"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "small dogs yip and bark sharply"], "sample_ids": ["vJ7JPEFhyLA", "v-wcQf4BDY0"], "start_seconds": ["16", "120"], "properties": ["three men, wind, flow", "bark, yip, sharply"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["tQWGZLItBXk", "zl9Dqx-j7q4"], "start_seconds": ["170", "6"], "properties": ["voice, music, whoosh", "engine, laugh, loud"], "captions_pred_video": ["worms revolution screenshots", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "three men talk while wind blows and some liquid flows"], "sample_ids": ["siJFXfGWgDk", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["a, bird, vehicle", "three men, wind, flow"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more moving parts", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["zj2R0XoFr5k", "ukg5L09Wpvo"], "start_seconds": ["50", "150"], "properties": ["airplane, boy, fly", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "a telephone rings followed by a woman talking"], "sample_ids": ["vuUVPzd2FXw", "tGcFnX0GHI"], "start_seconds": ["160", "0"], "properties": ["a, steam, release", "ring, talk, woman"], "captions_pred_video": ["of the person cooking on the grill with a spatula", null], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a man speaks as a machine runs", "a man speaks while a rooster crows and other people speak in the background"], "sample_ids": ["vD6lYD1l0BY", "wz7N8YRy74I"], "start_seconds": ["330", "30"], "properties": ["a, machine, run", "rooster, crow, background, people"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "a clock ticktocks"], "sample_ids": ["tK4VlLsNxak", "v-g-j2uTByM"], "start_seconds": ["120", "30"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "ticktocks, clock, ticktocks"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "wind blows as people chatter quietly"], "sample_ids": ["y2ZBGpgbhHM", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["animal, growl, bird", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["birds chirping and a dog panting", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["children speak as a female ask them questions", "a car accelerates and wind blows"], "sample_ids": ["wEBlkGWVWwE", "u0TrcHhkPQ"], "start_seconds": ["260", "20"], "properties": ["female, speak, questions", "accelerates, wind, blows"], "captions_pred_video": ["shows a person writing on the whiteboard", null], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a race car accelerates and revs its engine "], "question": "which is not a person", "label": 0}, {"captions": ["electronic beeps occur in a short series", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["y682ml90jGw", "y2bVZ7rz-5M"], "start_seconds": ["11", "280"], "properties": ["beeps, series, electronic", "motor noise, horn, siren"], "captions_pred_video": [null, "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a beeping sound is being made ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is a warning", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["w2JXXIAdUdg", "zFjIWfSD-4"], "start_seconds": ["10", "410"], "properties": ["snoring, distance, person", "People, motor, brakes"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", null], "captions_pred_audio": ["a person snoring and a dog whimpering", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a person speaking indiscriminately in the distance?", "label": 0}, {"captions": ["a woman speaks with water running", "an airplane engine spools and people speak"], "sample_ids": ["wTideSjRFS0", "wTjoRj1se3U"], "start_seconds": ["30", "390"], "properties": ["water, running, woman", "airplane, engine, spool"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a jet engine is running and people are talking"], "question": "which entity is a video of a woman speaking with water running?", "label": 0}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["vlS6YMeWAPo", "y2bVZ7rz-5M"], "start_seconds": ["40", "280"], "properties": ["sheep, baa, birds", "motor noise, horn, siren"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a goat bleats and birds chirp", "a truck is honking its horn and a siren is blaring "], "question": "which entity is followed by a horn honking and a siren wailing?", "label": 1}, {"captions": ["an electric engine works nearby followed by a child talking", "a motorcycle engine works nearby"], "sample_ids": ["xSKJGCItUWE", "tOSWIURC-4"], "start_seconds": ["10", "0"], "properties": ["engine, work, child", "engine, work, nearby"], "captions_pred_video": ["footage of the helicopter flying in the room", null], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a lawn mower is running "], "question": "which engine works nearby", "label": 1}, {"captions": ["a man speaks as a car is passing by", "multiple people speak and children yell while water gurgles"], "sample_ids": ["sK4u5T8hW78", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["a, car, pass", "multiple, people, yell"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a male speaks and another male speaks", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["viuTg1M-dqg", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["two males, speaking, male", "music, gunfire, explosion"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more action", "label": 1}, {"captions": ["music plays followed by gunshots and then an explosion", "a car speeding up in the distance"], "sample_ids": ["xKB8O8LTs6s", "u0TrcHhkPQ"], "start_seconds": ["70", "20"], "properties": ["music, gunshots, explosion", "distance, car, speed"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", null], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "dishes cling together then a man begins to speak"], "sample_ids": ["xzKKf9bKNUo", "sQGXqGcwOTc"], "start_seconds": ["10", "3"], "properties": ["background, noise, snoring", "cling, speak, dishes"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a person snoring loudly", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["a stream of water runs briefly", "water flows as men speak and yell"], "sample_ids": ["x-PeY8Yb8M4", "vJ7JPEFhyLA"], "start_seconds": ["300", "16"], "properties": ["stream, water, run", "water, flow, men"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a car is driving on a wet road ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows water flowing?", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "pigeons vocalize and birds chirp"], "sample_ids": ["sTpirNYo8vQ", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["a, tone, fast", "vocalize, bird, chirp"], "captions_pred_video": ["of a man taking a selfie on a bus", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a person", "label": 1}, {"captions": ["scraping and female speech with distant music", "a man speaks uses a drill"], "sample_ids": ["yHeVV-xeOxQ", "x5eIC7S0fbg"], "start_seconds": ["130", "60"], "properties": ["female, speech, music", "A man is speaking, uses a drill, and is a tool"], "captions_pred_video": ["of a girl milking a goat's udder", "a person in surgical gloves is using a needle to remove a small object from a tooth"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a man is speaking and using a power tool "], "question": "which entity is a tool", "label": 1}, {"captions": ["wind blows strongly", "wind blowing followed by a zoom"], "sample_ids": ["w8uLijTqtlU", "vr8ZXjEBhMQ"], "start_seconds": ["70", "150"], "properties": ["wind, blows, strongly", "wind, blow, zoom"], "captions_pred_video": ["footage is blurry and shaky", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["the wind is blowing strongly", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more calm", "label": 1}, {"captions": ["wind blows as people chatter quietly", "a toilet flushes and a female speaks"], "sample_ids": ["xBxDz0CFVn0", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["wind, chatter, people", "female, flushes, toilet"], "captions_pred_video": ["footage is blurry and out of focus", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a toilet flushes and a man speaks"], "question": "which entity is more silent", "label": 0}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "a siren comes to life as a horn blares"], "sample_ids": ["slZLHwNbbt4", "u--KhUW8l1Y"], "start_seconds": ["300", "0"], "properties": ["clap, distance, horn", "horn, siren, life"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "a firefighter spraying water from a fire hydrant at night"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a fire truck siren blares and a horn blows "], "question": "which entity is a siren", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "a vehicle engine revs and tires squeal"], "sample_ids": ["yJ0TePmaOo", "yDoT73BWsdA"], "start_seconds": ["390", "10"], "properties": ["two hard objects, man, speak", "engine revs, tires squeal, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["an adult woman speaks over chopping and silverware noises", "a man speaks as a car is passing by"], "sample_ids": ["yYJksgsxx5U", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["audio, woman, silverware", "a, car, pass"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "a man speaks as a car is passing by"], "sample_ids": ["tQWGZLItBXk", "sK4u5T8hW78"], "start_seconds": ["170", "30"], "properties": ["voice, music, whoosh", "a, car, pass"], "captions_pred_video": ["worms revolution screenshots", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "a train horn blows as it passes by"], "sample_ids": ["su6FAOcOA8c", "zVacuqSb4LI"], "start_seconds": ["4", "30"], "properties": ["engine, idle, woman", "horn, blows, train"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is moving", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["zj2R0XoFr5k", "zl9Dqx-j7q4"], "start_seconds": ["50", "6"], "properties": ["airplane, boy, fly", "engine, laugh, loud"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks uses a drill", "paper folding and crinkling"], "sample_ids": ["x5eIC7S0fbg", "zPpG3RD8lSs"], "start_seconds": ["60", "20"], "properties": ["A man is speaking, uses a drill, and is a tool", "paper, fold, crinkle"], "captions_pred_video": ["a person in surgical gloves is using a needle to remove a small object from a tooth", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking and using a power tool ", "the wind blows and a mouse clicks "], "question": "which is a tool", "label": 0}, {"captions": ["loud intermittent buzzing with intermittent laughter", "water splashes as an animal walks through"], "sample_ids": ["sLUnaPT5gM8", "w1ir-sZ3Im8"], "start_seconds": ["0", "90"], "properties": ["loud, laughter, intermittent", "animal, water, splashes"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "water splashes and gurgles as people speak"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a small engine idles continuously", "an engine idles consistently before sputtering some"], "sample_ids": ["y5WII6cTH7k", "rwTERCUno"], "start_seconds": ["40", "90"], "properties": ["engine, idle, continuously", "engine, idle, sputter"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", null], "captions_pred_audio": ["an engine is knocking and vibrating ", "an engine is idling and vibrating"], "question": "which engine idles consistently before sputtering some", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "a clock ticktocks"], "sample_ids": ["y8WEcpOlT3I", "v-g-j2uTByM"], "start_seconds": ["40", "30"], "properties": ["wind, speak, buffeting", "ticktocks, clock, ticktocks"], "captions_pred_video": ["on how to use a sewing machine youtube", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a heavy rain falls endlessly", "people applaud and hoot and chat quietly"], "sample_ids": ["wP8ZKrlx3oA", "wwyfGO2J4"], "start_seconds": ["40", "90"], "properties": ["heavy, rain, fall", "people, applaud, hoot"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", null], "captions_pred_audio": ["a heavy rain is falling on a surface", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be a movie", "label": 1}, {"captions": ["a train horn blows as it passes by", "paper is crumpling consistently"], "sample_ids": ["zVacuqSb4LI", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["horn, blows, train", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "water splashes and a door squeaks"], "sample_ids": ["y8dSeubCNI", "sdXV-ylviw"], "start_seconds": ["4", "190"], "properties": ["men, women, car", "sound, splash, door"], "captions_pred_video": [null, null], "captions_pred_audio": ["an engine revving and people talking in the background", "a dog barks and taps with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vddP56-ogds", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["water, splash, person, laugh", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "a car accelerates and wind blows"], "sample_ids": ["sofxkNWaP0s", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["wind, engine, louder", "accelerates, wind, blows"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", null], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a small engine spits as it runs", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sZvwOuuPGP0", "sLUnaPT5gM8"], "start_seconds": ["50", "0"], "properties": ["spits, engine, runs", "loud, laughter, intermittent"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a medium engine is running ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more like a person", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "a duck quacks loudly and continuously"], "sample_ids": ["ylpYOorfH4o", "vh30P49Po6s"], "start_seconds": ["410", "30"], "properties": ["motor, run, steady", "loud, continuous, quacks"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "a door slams shut roughly"], "sample_ids": ["vbr9mHKc8WM", "zkKdxzNC97Y"], "start_seconds": ["40", "27"], "properties": ["noise, loudness, engine", "a door, slams, shut"], "captions_pred_video": [null, "footage of the door opening and closing in slow motion"], "captions_pred_audio": ["an engine is idling", "a door is opened and closed"], "question": "which entity is louder", "label": 1}, {"captions": ["people speak and tapping occurs", "plastic is tapped on while someone speaks"], "sample_ids": ["tFCUUGdREgA", "wvKpEYswXO0"], "start_seconds": ["70", "150"], "properties": ["people, tap, speak", "plastic, tap, speak"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "water is sprayed across a hard surface"], "sample_ids": ["uWAAAL4CIoc", "sQwlkXjQabo"], "start_seconds": ["0", "10"], "properties": ["a woman, chirps, animal", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "wind blowing followed by a zoom"], "sample_ids": ["xfaoyyzw2WU", "vr8ZXjEBhMQ"], "start_seconds": ["180", "150"], "properties": ["loud, jet engine, roar", "wind, blow, zoom"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "wind blows and a chainsaw cuts through wood "], "question": "which is not loud", "label": 1}, {"captions": ["a man woman speak while crickets sing", "pigeons vocalize and birds chirp"], "sample_ids": ["zTLVJCo4WEE", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["a, crickets, sing", "vocalize, bird, chirp"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "of the pigeon in the cage"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "metal clacking as food and oil sizzles followed by a woman talking"], "sample_ids": ["wAAkbZToh8", "vW4x7S1VfQc"], "start_seconds": ["0", "150"], "properties": ["burp, laugh, speak", "clacking, oil, woman"], "captions_pred_video": [null, "footage of a person cooking fish in a frying pan on a stove top"], "captions_pred_audio": ["a man burps and a woman speaks", "food sizzles in a frying pan"], "question": "which entity is a person", "label": 0}, {"captions": ["multiple adults speaking, and a child shouting in the background", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["yks4cLgIDMc", "zl9Dqx-j7q4"], "start_seconds": ["170", "6"], "properties": ["background, speaking, child", "engine, laugh, loud"], "captions_pred_video": ["footage of two kids wrestling on the floor", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and a child is crying", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "wind blows as people chatter quietly"], "sample_ids": ["yNtRmrn0io8", "xBxDz0CFVn0"], "start_seconds": ["210", "30"], "properties": ["storm, distance, strike", "wind, chatter, people"], "captions_pred_video": ["footage of a house in the middle of the night", "footage is blurry and out of focus"], "captions_pred_audio": ["rain falls and thunder roars", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a toilet flushes and water drains", "an infant crying frantically"], "sample_ids": ["sfAvvZwdLCY", "zwOBqeFTgiU"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "cry, infant, frantically"], "captions_pred_video": ["footage of the toilet in the bathroom", "of the baby crying in the car seat"], "captions_pred_audio": ["a toilet is flushed", "a baby cries loudly"], "question": "which entity is a human", "label": 1}, {"captions": ["a baby cries and a woman speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["tMbMDvT50j8", "tDVADusiIoc"], "start_seconds": ["12", "60"], "properties": ["a, cry, woman", "water, radio, man"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["multiple birds vocalize and wind blows", "a woman speaks happily and an animal chirps"], "sample_ids": ["uoGVs9yUqY4", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["multiple, vocalize, wind", "a woman, chirps, animal"], "captions_pred_video": ["for how to make a wooden shed door youtube", null], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a woman is speaking and a dog is barking "], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["xZepNM9qcRA", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["background, motor, run", "engine, revs, vehicle"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a race car accelerates and revs its engine "], "question": "which entity has a vehicle passing by?", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "water splashes as an animal walks through"], "sample_ids": ["uzQnlJXBbOM", "w1ir-sZ3Im8"], "start_seconds": ["50", "90"], "properties": ["ringing, beep, stop", "animal, water, splashes"], "captions_pred_video": ["footage of a person using a cell phone on a table", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a telephone rings and a man speaks", "water splashes and gurgles as people speak"], "question": "which entity is more likely to be heard", "label": 0}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "vehicles pass by on a roadway"], "sample_ids": ["y2bVZ7rz-5M", "tgbONvsP47Y"], "start_seconds": ["280", "0"], "properties": ["engine, horn, siren", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a car is driving on the road "], "question": "which vehicle is moving", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "some tunes played by whistling"], "sample_ids": ["su6FAOcOA8c", "u6BnG6YZqJ4"], "start_seconds": ["4", "0"], "properties": ["engine, run, woman", "tune, play, whistling"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["uJV8NDaHqqk", "su6FAOcOA8c"], "start_seconds": ["100", "4"], "properties": ["loud, fly, chirp", "engine, idle, woman"], "captions_pred_video": ["a bee hive in a wooden box", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a swarm of bees buzzing around", "a woman is speaking and a subway train is moving "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "a man speaks over intermittent keyboard taps"], "sample_ids": ["t69a8aRKhmc", "tw76HGONaKg"], "start_seconds": ["30", "570"], "properties": ["a, b, c", "audio, man, keyboard"], "captions_pred_video": ["footage is blurry and out of focus", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man speaks and types on a computer keyboard "], "question": "which entity has a man speaking over intermittent keyboard taps?", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "a man speaks followed by another man speaking outside"], "sample_ids": ["u21-Z5gJCB8", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["background, voice, man", "two men, speak, follow"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking with another voice speaking in the background?", "label": 0}, {"captions": ["a male speaks over some small clicks", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["uXxVebHsGZ8", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["male, clicks, speak", "beeps, hit, woman"], "captions_pred_video": [null, "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "a man speaks as a car is passing by"], "sample_ids": ["zY3icUyMdh8", "sK4u5T8hW78"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "a, car, pass"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more passive", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["y8dSeubCNI", "vlS6YMeWAPo"], "start_seconds": ["4", "40"], "properties": ["men, women, car", "sheep, baa, birds"], "captions_pred_video": [null, "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["an engine revving and people talking in the background", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "winds blows roughly as a vehicle races past"], "sample_ids": ["yks4cLgIDMc", "xjvTpk2Zpr8"], "start_seconds": ["170", "70"], "properties": ["background, speaking, child", "wind, blows, vehicle"], "captions_pred_video": ["footage of two kids wrestling on the floor", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and a child is crying", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["xjhAnI2q6hM", "vfYTJq7nU"], "start_seconds": ["6", "130"], "properties": ["engine revs, vehicle, people", "rustling, ducks, quack"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", null], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["a church bell rings several times", "someone is typing on a computer keyboard"], "sample_ids": ["sUVVjE3Ucp8", "v0x1odnXtP0"], "start_seconds": ["0", "210"], "properties": ["ring, bell, several", "keyboard, type, computer"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", "how to make money on youtube in spanish"], "captions_pred_audio": ["a church bell is ringing ", "a person is typing on a keyboard"], "question": "which is not a type of computer", "label": 0}, {"captions": ["birds chirp quietly and an adult man speaks", "an engine runs loudly"], "sample_ids": ["zuua6-5goWw", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["birds, chirp, quiet, man, speaks", "loud, engine, run"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "footage is blurry because it's raining outside"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a lawn mower is running and men are speaking "], "question": "which is quieter", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "paper is crumpling consistently"], "sample_ids": ["uzQnlJXBbOM", "v5cSxLaHADY"], "start_seconds": ["50", "0"], "properties": ["ringing, beep, stop", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a person using a cell phone on a table", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a telephone rings and a man speaks", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["tw76HGONaKg", "xfaoyyzw2WU"], "start_seconds": ["570", "180"], "properties": ["A, game, keyboard", "loud, jet engine, roar"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "several insects fly while two men talk"], "sample_ids": ["vdoxuJn9lTc", "s-T9OVOiMLo"], "start_seconds": ["40", "330"], "properties": ["burp, loud, girl", "several, fly, men"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a child speaks followed by a burp", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a girl speaking?", "label": 0}, {"captions": ["a vehicle engine revs as the vehicle passes", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["yDoT73BWsdA", "wqZ135Ssz0"], "start_seconds": ["10", "60"], "properties": ["engine, revs, vehicle", "two men, woman, birds"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "a child speaks in closed space"], "sample_ids": ["xfudFO976zE", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["animal, bleats, cry", "child, space, speak"], "captions_pred_video": ["footage is blurry and shaky", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["paper is crumpling consistently", "a car accelerates and wind blows"], "sample_ids": ["v5cSxLaHADY", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "accelerates, wind, blows"], "captions_pred_video": ["footage of the person holding a pair of scissors", null], "captions_pred_audio": ["paper is crumpled and crinkled", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "a man speaks as a motor runs in the background"], "sample_ids": ["vJ7JPEFhyLA", "xZepNM9qcRA"], "start_seconds": ["16", "30"], "properties": ["three men, wind, flow", "background, motor, run"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["continuous sneezing together with speech", "frogs croak and vocalize"], "sample_ids": ["x4dZyf9Gbj0", "yswmmRZFItk"], "start_seconds": ["130", "0"], "properties": ["continuous, sneeze, speech", "croak, vocalize, frog"], "captions_pred_video": ["footage is blurry and out of focus", "a close up of a frog in the water"], "captions_pred_audio": ["a woman sneezes and speaks", "a frog is croaking"], "question": "which entity is a frog?", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "a horn rings out as a machine runs by"], "sample_ids": ["uZesmtKZGSw", "slZLHwNbbt4"], "start_seconds": ["250", "300"], "properties": ["car, track, man", "a, horn, run"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "someone snores nearby"], "sample_ids": ["vJ7JPEFhyLA", "spJCm8tD9Zo"], "start_seconds": ["16", "90"], "properties": ["three men, wind, flow", "someone snores, nearby, someone"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["sWZzXuWYY", "vbZ-0lGPneg"], "start_seconds": ["420", "30"], "properties": ["male, speech, banging", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a woman is speaking and a dog is whimpering"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "someone whistles a tune"], "sample_ids": ["uqFtmnhuqA8", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["a, b, c", "someone, tune, whistle"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", null], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "a person snores loudly multiple times at a close distance"], "sample_ids": ["xjvTpk2Zpr8", "sSMl2vc3ek"], "start_seconds": ["70", "20"], "properties": ["wind, blows, vehicle", "loud, multiple, distance"], "captions_pred_video": ["footage of a dhl plane landing on the runway", null], "captions_pred_audio": ["a jet engine roars and wind blows ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "a toilet flushes and a female speaks"], "sample_ids": ["w0xsN8X18Y", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["music, surface, rain", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a toilet flushes and a man speaks"], "question": "which entity is more likely to be in a bathroom?", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["xM4joTqDVp4", "tdWhHV3X25Q"], "start_seconds": ["160", "60"], "properties": ["background, chirp, birds", "applause, audience, yells"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "a duck quacks continuously"], "sample_ids": ["un9VQlzgZM", "vh30P49Po6s"], "start_seconds": ["5", "30"], "properties": ["females, talk, laugh", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a male speaks and another male speaks", "small dogs yip and bark sharply"], "sample_ids": ["viuTg1M-dqg", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["two males, speaking, male", "bark, yip, sharply"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["an adult woman speaks over chopping and silverware noises", "birds chirp and objects are moved around"], "sample_ids": ["yYJksgsxx5U", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["audio, woman, silverware", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "insects buzz and a man speaks"], "question": "which entity is a video?", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "a car accelerates and wind blows"], "sample_ids": ["tEE3MpBt1sg", "u0TrcHhkPQ"], "start_seconds": ["50", "20"], "properties": ["drill, something, laugh", "accelerates, wind, blows"], "captions_pred_video": ["footage is blurry due to the smoke in the air", null], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "people applaud and hoot and chat quietly"], "sample_ids": ["xKB8O8LTs6s", "wwyfGO2J4"], "start_seconds": ["70", "90"], "properties": ["music, gunfire, explosion", "people, applaud, hoot"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", null], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["xyL9F5VrjkE", "zj2R0XoFr5k"], "start_seconds": ["20", "50"], "properties": ["engine, run, wind", "airplane, boy, fly"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a man is filing a hard object", "cats meow and then a person begins to talk while the cats continue to meow"], "sample_ids": ["vveS8HT7Uog", "x5cuQjOdM3E"], "start_seconds": ["100", "30"], "properties": ["a man, hard, object", "cat, talk, meow"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "a black background with an airplane flying in the sky"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a cat meows and a woman speaks"], "question": "which entity is talking", "label": 1}, {"captions": ["a motorcycle engine is idling", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vZAqdHZ81yA", "zj2R0XoFr5k"], "start_seconds": ["180", "50"], "properties": ["engine, motorcycle, idling", "airplane, boy, fly"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["an engine is idling loudly", "a woman speaks while a helicopter flies overhead "], "question": "which entity is moving", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "a car accelerates and wind blows"], "sample_ids": ["wqUmIEzuNz4", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["frog, bird, vocalize", "accelerates, wind, blows"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", null], "captions_pred_audio": ["a cat meows and rustles", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man woman speak while crickets sing", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["zTLVJCo4WEE", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["a, crickets, sing", "gun, shoot, water"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "a consistent ticking pattern"], "sample_ids": ["vdoxuJn9lTc", "sCeWURVHfOM"], "start_seconds": ["40", "30"], "properties": ["burp, loud, girl", "ticking, pattern, clock"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "- a close-up view of the clock's inner workings"], "captions_pred_audio": ["a child speaks followed by a burp", "ticking of a clock"], "question": "which entity is a clock?", "label": 1}, {"captions": ["water flows followed by women screaming", "a vehicle engine accelerating then running on idle"], "sample_ids": ["w5W5Kqtc8E", "vYkA3cfXp5Q"], "start_seconds": ["100", "30"], "properties": ["water, flow, women", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "an aircraft engine runs"], "sample_ids": ["vddP56-ogds", "yLCORCnd35Q"], "start_seconds": ["30", "0"], "properties": ["water, flow, laugh", "engine, aircraft, runs"], "captions_pred_video": [null, "a lufthansa airbus a380 landing at london's heathrow airport"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a train is moving and its wheels are squealing "], "question": "which entity is a moving object", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["sxYkFKFIZD0", "zl9Dqx-j7q4"], "start_seconds": ["20", "6"], "properties": ["screech, man, door", "engine, laugh, loud"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "a machine beeps continuously"], "sample_ids": ["zofjfKhqLk8", "y682ml90jGw"], "start_seconds": ["10", "11"], "properties": ["background, metal, clank", "beeps, machine, continuously"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a beeping sound is being made "], "question": "which machine beeps continuously", "label": 1}, {"captions": ["an engine starts and increases in power", "an airplane engine runs"], "sample_ids": ["zjTG0gaGCUI", "yVPZ2MNWpms"], "start_seconds": ["80", "0"], "properties": ["power, increase, engine", "engine, airplane, runs"], "captions_pred_video": [null, "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "small dogs yip and bark sharply"], "sample_ids": ["vbZ-0lGPneg", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["a woman, a television program, a bird", "bark, yip, sharply"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a train horn sounds as the train approaches", "paper is crumpling consistently"], "sample_ids": ["slZLHwNbbt4", "v5cSxLaHADY"], "start_seconds": ["300", "0"], "properties": ["train, horn, sound", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a door opens and birds chirp", "a woman speaks as she rubs two objects together"], "sample_ids": ["yeFvk9x0wWI", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["door, open, birds", "two objects, woman, speak"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["frogs croak and vocalize", "frogs croak and vocalize"], "sample_ids": ["yswmmRZFItk", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["croak, vocalize, frog", "croak, vocalize, frog"], "captions_pred_video": ["a close up of a frog in the water", "a close up of a frog in the water"], "captions_pred_audio": ["a frog is croaking", "a frog is croaking"], "question": "which frog is croaking", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "a female speaks softly as paper crinkles"], "sample_ids": ["w0xsN8X18Y", "xvDdE3zNf8Y"], "start_seconds": ["30", "120"], "properties": ["music, surface, rain", "a, female, speaks"], "captions_pred_video": [null, "of a woman in a white shirt and glasses holding a purple tie"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a woman speaks and crumples paper"], "question": "which entity has a female speaking?", "label": 1}, {"captions": ["an airplane engine roars increasingly louder", "a man speaks as insects buzz and a bird chirps"], "sample_ids": ["vBslzh7saPw", "t25U-v4k4ts"], "start_seconds": ["90", "40"], "properties": ["engine, roar, louder", "a, chirps, bird"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man is speaking and bees are buzzing"], "question": "which entity is quieter", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "people cheer as a vehicle engine revs"], "sample_ids": ["yYEVLuqEytU", "xjhAnI2q6hM"], "start_seconds": ["40", "6"], "properties": ["grunt, slurp, background", "engine revs, vehicle, people"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a car speeding up in the distance"], "sample_ids": ["s4Uz1Ffgo04", "u0TrcHhkPQ"], "start_seconds": ["100", "20"], "properties": ["roars, background, people speaking", "distance, car, speed"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a race car accelerates and revs its engine "], "question": "which car is speeding up in the distance", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "paper is crumpling consistently"], "sample_ids": ["xfudFO976zE", "v5cSxLaHADY"], "start_seconds": ["0", "0"], "properties": ["animal, bleats, cry", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage is blurry and shaky", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a woman and man speak while food is frying", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["zk-xJGQU8-4", "uZesmtKZGSw"], "start_seconds": ["130", "250"], "properties": ["food, man, woman", "men, talk, cars"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about a man and woman speaking?", "label": 0}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "winds blows roughly as a vehicle races past"], "sample_ids": ["w9lpbUn0hPc", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["male, wind, rustling", "wind, blows, vehicle"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "a jet engine roars and wind blows "], "question": "which entity is about a vehicle racing past?", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["x9JovgqUcs", "uYT5gxnyMWM"], "start_seconds": ["500", "50"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking and typing on a computer keyboard?", "label": 0}, {"captions": ["two men speak as a buffeting wind blows", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["y8WEcpOlT3I", "zFjIWfSD-4"], "start_seconds": ["40", "410"], "properties": ["wind, speak, buffeting", "People, motor, brakes"], "captions_pred_video": ["on how to use a sewing machine youtube", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is more likely to be in a car?", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "a horn rings out as a machine runs by"], "sample_ids": ["sOa7g-44Dag", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["background, man, spray", "a, horn, run"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is about a machine running by?", "label": 1}, {"captions": ["someone snores nearby", "water splashes and a motorboat passes as people yell"], "sample_ids": ["spJCm8tD9Zo", "w5W5Kqtc8E"], "start_seconds": ["90", "100"], "properties": ["someone snores, nearby, someone", "water, splashes, motorboat"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is more active", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "a woman speaks in a fast tone with a male"], "sample_ids": ["uYT5gxnyMWM", "sTpirNYo8vQ"], "start_seconds": ["50", "30"], "properties": ["a, scream, girl", "a, tone, fast"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "of a man taking a selfie on a bus"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking while a car is revving and accelerating "], "question": "which entity is more calm", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["v7jJS8aAyA", "yDoT73BWsdA"], "start_seconds": ["10", "10"], "properties": ["wind, blows, loudly", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["goats bleat and metal clings", "an infant crying frantically"], "sample_ids": ["tH17JPjDPnc", "zwOBqeFTgiU"], "start_seconds": ["260", "30"], "properties": ["bleat, metal, clings", "cry, infant, frantically"], "captions_pred_video": ["feed of the goats eating hay in the barn", "of the baby crying in the car seat"], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a person is snoring while sleeping", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vJrjSeP17yE", "vfYTJq7nU"], "start_seconds": ["40", "130"], "properties": ["a person is sleeping, snoring, person", "rustling, ducks, quack"], "captions_pred_video": ["a black background with a small plane flying in the sky", null], "captions_pred_audio": ["a person snoring loudly", "a duck quacks and a woman speaks"], "question": "which entity is a video", "label": 1}, {"captions": ["a horse runs while two women talk", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sdvI1mHAsc", "su6FAOcOA8c"], "start_seconds": ["20", "4"], "properties": ["two women, horse, run", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a woman is speaking and a subway train is moving "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "a woman and man are speaking"], "sample_ids": ["tw76HGONaKg", "vbpKkWvfOu4"], "start_seconds": ["570", "560"], "properties": ["A, game, keyboard", "two people, speaking, woman, man"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a woman is speaking and a man is speaking"], "question": "which entity shows two people speaking", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "continuous snoring"], "sample_ids": ["sapQIQUhFc", "sLkeqCDJIyw"], "start_seconds": ["280", "120"], "properties": ["liquid, flow, distance", "loud, snoring, noise"], "captions_pred_video": [null, ", what is the man doing on the couch? sleeping"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a person is snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["electronic beeps occur in a short series", "some tunes played by whistling"], "sample_ids": ["y682ml90jGw", "u6BnG6YZqJ4"], "start_seconds": ["11", "0"], "properties": ["beeps, series, electronic", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a beeping sound is being made ", "a person whistling a song"], "question": "which entity is played by whistling", "label": 1}, {"captions": ["children speak and play together", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["yVVP8XvWJTo", "tdWhHV3X25Q"], "start_seconds": ["260", "60"], "properties": ["children, speak, play", "applause, audience, yells"], "captions_pred_video": ["footage of a playground at a school or daycare center", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["s4Uz1Ffgo04", "tDlysoZiA1I"], "start_seconds": ["100", "0"], "properties": ["roars, background, people speaking", "animal, grunts, chirps"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "birds are chirping and a rooster is crowing "], "question": "which entity is quieter", "label": 1}, {"captions": ["a helicopter engine runs", "a vehicle engine runs while a siren and horn sound"], "sample_ids": ["t5ZbXbniOWk", "u--KhUW8l1Y"], "start_seconds": ["30", "0"], "properties": ["engine, helicopter, run", "engine, sound, horn"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "a firefighter spraying water from a fire hydrant at night"], "captions_pred_audio": ["a helicopter is flying overhead ", "a fire truck siren blares and a horn blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "a man talks while metallic objects are rapped and steam is released"], "sample_ids": ["un9VQlzgZM", "vuUVPzd2FXw"], "start_seconds": ["5", "160"], "properties": ["females, talk, laugh", "a, steam, release"], "captions_pred_video": [null, "of the person cooking on the grill with a spatula"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a man is speaking and dishes are clanging"], "question": "which entity is a man?", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["vKrYfzleLB8", "vlS6YMeWAPo"], "start_seconds": ["110", "40"], "properties": ["a, ring, gunshots", "sheep, baa, birds"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wqZ135Ssz0", "zj2R0XoFr5k"], "start_seconds": ["60", "50"], "properties": ["two men, woman, birds", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is about flying?", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "wind blowing followed by a zoom"], "sample_ids": ["tQWGZLItBXk", "vr8ZXjEBhMQ"], "start_seconds": ["170", "150"], "properties": ["voice, music, whoosh", "wind, blow, zoom"], "captions_pred_video": ["worms revolution screenshots", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more like a natural phenomenon", "label": 1}, {"captions": ["paper folding and crinkling", "a man speaks while water drains"], "sample_ids": ["zPpG3RD8lSs", "vSeGhaZt-aI"], "start_seconds": ["20", "50"], "properties": ["paper, fold, crinkle", "water, drain, man"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a man is speaking and pouring liquid with background noise "], "question": "which entity is a video", "label": 1}, {"captions": ["a goat bleats as a person speaks", "water splashes as an animal walks through"], "sample_ids": ["tPJvjq9QePY", "w1ir-sZ3Im8"], "start_seconds": ["40", "90"], "properties": ["bleats, person, speak", "animal, water, splashes"], "captions_pred_video": ["a dog and a sheep in a barn", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a baby cries and a man speaks", "water splashes and gurgles as people speak"], "question": "which entity is a video of an animal walking through water?", "label": 1}, {"captions": ["an infant crying as a woman laughs", "someone is typing on a computer keyboard"], "sample_ids": ["xhmRY9yhC7c", "v0x1odnXtP0"], "start_seconds": ["20", "210"], "properties": ["a, laugh, infant", "keyboard, type, computer"], "captions_pred_video": ["of a baby crying in a baby bouncer", "how to make money on youtube in spanish"], "captions_pred_audio": ["a baby cries and a woman speaks", "a person is typing on a keyboard"], "question": "which is not a person", "label": 0}, {"captions": ["distant men speak as a spray can nozzle is depressed", "a motorcycle engine works nearby"], "sample_ids": ["rwtmaKiCcQU", "tOSWIURC-4"], "start_seconds": ["30", "0"], "properties": ["nozzle, depressed, spray can", "engine, work, nearby"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", null], "captions_pred_audio": ["spraying and people speaking", "a lawn mower is running "], "question": "which entity is a machine", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["uZesmtKZGSw", "wSVhSdj0F0"], "start_seconds": ["250", "10"], "properties": ["men, talk, cars", "horn honks, keys jingle, electronic beep"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", null], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a car horn honks and keys jangle with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "a car accelerates and wind blows"], "sample_ids": ["vf9xf3vMsGM", "u0TrcHhkPQ"], "start_seconds": ["540", "20"], "properties": ["A man speaks while turning a water faucet on.", "accelerates, wind, blows"], "captions_pred_video": ["of the person washing their hands under the faucet", null], "captions_pred_audio": ["a man is speaking while water is running in the background", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks with water running", "birds chirp, a woman speaks, and insects buzz"], "sample_ids": ["wTideSjRFS0", "t97k0cejSQE"], "start_seconds": ["30", "250"], "properties": ["water, running, woman", "sound, chirp, buzz"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "a bee on a purple thistle flower"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a bee buzzes and a woman speaks"], "question": "which entity has a woman speaking with water running?", "label": 0}, {"captions": ["continuous chugging with birds chirping in the background", "propeller rearing loudly with some male and female voices interspersed in the background"], "sample_ids": ["xM4joTqDVp4", "vqZuVbG6-HI"], "start_seconds": ["160", "130"], "properties": ["background, chirp, birds", "background, male, female"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "footage is blurry because it's raining outside"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a lawn mower is running and men are speaking "], "question": "which entity has a male and female voice in the background?", "label": 1}, {"captions": ["a door slams shut roughly", "someone snores nearby"], "sample_ids": ["zkKdxzNC97Y", "spJCm8tD9Zo"], "start_seconds": ["27", "90"], "properties": ["a door, slams, shut", "someone snores, nearby, someone"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a door is opened and closed", "a person is snoring loudly"], "question": "which entity is more annoying", "label": 1}, {"captions": ["small dogs yip and bark sharply", "pigeons vocalize and birds chirp"], "sample_ids": ["v-wcQf4BDY0", "uiS58TNyUiw"], "start_seconds": ["120", "430"], "properties": ["bark, yip, sharply", "vocalize, bird, chirp"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "of the pigeon in the cage"], "captions_pred_audio": ["a dog barks and growls", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["wSVhSdj0F0", "uEU-Hg5MTN8"], "start_seconds": ["10", "27"], "properties": ["horn honks, keys jingle, slam", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be in a car", "label": 0}, {"captions": ["a woman speaks and then a man speaks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vbpKkWvfOu4", "sSMl2vc3ek"], "start_seconds": ["560", "20"], "properties": ["a, man, speaks", "loud, multiple, distance"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["the rumbling of a bus followed by a soft male voice", "birds chirp and objects are moved around"], "sample_ids": ["vK93VuO0yNc", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["male voice, bus, rumble", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a car drives by with wind noise in the background ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a man talks as several small engines run", "an infant crying as a woman laughs"], "sample_ids": ["u9A6VZQCZpU", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["a, man, talk", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a door opens and closes", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vBHyYJ8pL0", "xKB8O8LTs6s"], "start_seconds": ["2", "70"], "properties": ["open, close, door", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "someone snores nearby"], "sample_ids": ["sa6TLVbooCc", "spJCm8tD9Zo"], "start_seconds": ["240", "90"], "properties": ["people, laugh, child", "someone snores, nearby, someone"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["an animal quacks rapidly", "a toilet flushes and a female speaks"], "sample_ids": ["vh30P49Po6s", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["animal, quacks, rapidly", "female, flushes, toilet"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage is blurry and out of focus"], "captions_pred_audio": ["a duck is quacking loudly", "a toilet flushes and a man speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks followed by another man speaking outside", "a music is played followed by a frog croaking and then music is played again"], "sample_ids": ["viuTg1M-dqg", "voJh2gJxXhA"], "start_seconds": ["30", "50"], "properties": ["two men, speak, follow", "music, frog, croak"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "a frog on a black background with a red diamond in the center"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "music is playing and crickets are chirping "], "question": "which entity is a frog?", "label": 1}, {"captions": ["various birds chirp and squeal, and an animal grunts", "vehicles pass by on a roadway"], "sample_ids": ["tDlysoZiA1I", "tgbONvsP47Y"], "start_seconds": ["0", "0"], "properties": ["animal, grunts, chirps", "pass, vehicle, roadway"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage of a fire truck entering a garage"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a car is driving on the road "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a male speaks over some small clicks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["uXxVebHsGZ8", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["male, clicks, speak", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "birds chirp and objects are moved around"], "sample_ids": ["sQGXqGcwOTc", "yPUYU6t3rwo"], "start_seconds": ["3", "370"], "properties": ["cling, speak, dishes", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a door opens and closes", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vBHyYJ8pL0", "tiDFTC-5vU"], "start_seconds": ["2", "30"], "properties": ["open, close, door", "male, duck, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["an infant crying as a woman laughs", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["xhmRY9yhC7c", "xKB8O8LTs6s"], "start_seconds": ["20", "70"], "properties": ["a, laugh, infant", "music, gunfire, explosion"], "captions_pred_video": ["of a baby crying in a baby bouncer", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a baby cries and a woman speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more active", "label": 1}, {"captions": ["frogs croak and vocalize", "a horn blows as a train chugs along and warning bells ring"], "sample_ids": ["yswmmRZFItk", "ukg5L09Wpvo"], "start_seconds": ["0", "150"], "properties": ["croak, vocalize, frog", "a train, a horn, a bell"], "captions_pred_video": ["a close up of a frog in the water", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a frog is croaking", "a train blows its whistle and blows its horn "], "question": "which entity is a warning device", "label": 1}, {"captions": ["some men converse over an engine running", "water flows and trickles"], "sample_ids": ["sCiy7QS1U", "tB7hWb9gTuQ"], "start_seconds": ["300", "30"], "properties": ["men, converse, engine", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "water quietly rushes by while birds chirp in the background"], "sample_ids": ["sG7TyPnFDR0", "sYITalLZjj4"], "start_seconds": ["180", "30"], "properties": ["beeps, machine, smoke alarm", "water, rushes, background, birds"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "two ducks are swimming in the water near each other"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "wind blows and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "a propeller rotates loudly and intensely"], "sample_ids": ["sU53zg9Jp7s", "ugHJF0hfYkg"], "start_seconds": ["380", "10"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "loud, intense, propeller"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "a man speaks as a car is passing by"], "sample_ids": ["yRx9txMcBl0", "sK4u5T8hW78"], "start_seconds": ["40", "30"], "properties": ["accelerates, tires, squeals", "a, car, pass"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["a man speaks as a vehicle engine idles", "winds blows roughly as a vehicle races past"], "sample_ids": ["shmR4OZtzqA", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["man, engine, idle", "wind, blows, vehicle"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man speaks while a motor runs", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "pigeons vocalize and birds chirp"], "sample_ids": ["uPDn2BFTHk", "uiS58TNyUiw"], "start_seconds": ["140", "430"], "properties": ["woman, laughs, speaks", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "pigeons vocalize and birds chirp"], "sample_ids": ["vmrxwuAMb2I", "uiS58TNyUiw"], "start_seconds": ["40", "430"], "properties": ["a dog, inhales, exhales", "vocalize, bird, chirp"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "of the pigeon in the cage"], "captions_pred_audio": ["a dog barks and growls", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["goats bleat and people speak", "wind blowing followed by a zoom"], "sample_ids": ["z5iUE5h0EPs", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["goats bleat, people speak, language", "wind, blow, zoom"], "captions_pred_video": ["of the goat in the barn", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a goat bleats and a man speaks", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["birds chirp then an animal grunts", "a frog croaks as other frogs croak in the background"], "sample_ids": ["tDlysoZiA1I", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["animal, grunt, chirp", "background, frog, croak"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "a close up of a frog in the water"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a frog is croaking"], "question": "which entity is a solitary animal?", "label": 0}, {"captions": ["continuous snoring", "a person is burping while a girl speaks"], "sample_ids": ["sLkeqCDJIyw", "vdoxuJn9lTc"], "start_seconds": ["120", "40"], "properties": ["loud, snoring, noise", "person, burp, girl"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "a group of young girls playing a video game together in a living room"], "captions_pred_audio": ["a person is snoring loudly", "a child speaks followed by a burp"], "question": "which noise is louder", "label": 0}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "water splashes as an animal walks through"], "sample_ids": ["xOZfdgAgJ9o", "w1ir-sZ3Im8"], "start_seconds": ["40", "90"], "properties": ["woman, whimpering, speaking", "animal, water, splashes"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "multiple beeps are followed by a squawk and a child speaking"], "sample_ids": ["sLUnaPT5gM8", "w34HjHr6gAY"], "start_seconds": ["0", "30"], "properties": ["loud, laughter, intermittent", "beeps, squawk, child speaking"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a beep sounds followed by a child speaking"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "a vehicle accelerates before a race car idles then accelerates quickly"], "sample_ids": ["ugHJF0hfYkg", "sjlVMgdGSK0"], "start_seconds": ["10", "30"], "properties": ["loud, intense, propeller", "accelerates, vehicle, race car"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a 1965 ford falcon drag racing at 100mph on a 1/8 mile track"], "captions_pred_audio": ["a helicopter is flying overhead ", "a car accelerates and revs its engine "], "question": "which is a vehicle", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vKrYfzleLB8", "uYT5gxnyMWM"], "start_seconds": ["110", "50"], "properties": ["a, ring, gunshots", "female, spraying, scream"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person spraying and screaming?", "label": 1}, {"captions": ["a small engine idles continuously", "a person snores loudly multiple times at a close distance"], "sample_ids": ["y5WII6cTH7k", "sSMl2vc3ek"], "start_seconds": ["40", "20"], "properties": ["engine, idle, continuously", "loud, multiple, distance"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", null], "captions_pred_audio": ["an engine is knocking and vibrating ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["goats bleat and people speak", "winds blows roughly as a vehicle races past"], "sample_ids": ["z5iUE5h0EPs", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["goats bleat, people speak, language", "wind, blows, vehicle"], "captions_pred_video": ["of the goat in the barn", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a goat bleats and a man speaks", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["a child babbles as a woman speaks", "a man speaks, another man speaks, and a small bell dings"], "sample_ids": ["wEBlkGWVWwE", "t69a8aRKhmc"], "start_seconds": ["260", "30"], "properties": ["a, babble, woman", "a, b, c"], "captions_pred_video": ["shows a person writing on the whiteboard", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man is speaking and birds are chirping in the background "], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["people converse in the distance as a clock ticks", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vZAw4apG0Es", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["people, clock, converse", "engine, idle, woman"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a clock is ticking and people are talking", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "a telephone rings followed by a woman talking"], "sample_ids": ["w8uLijTqtlU", "tGcFnX0GHI"], "start_seconds": ["70", "0"], "properties": ["wind, microphone, noise", "ring, talk, woman"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["the wind is blowing strongly", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a recording", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["xO-Q2BlIIPU", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["two men, exclamation, speak", "engine, idle, woman"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["tapping occurs then a baby cries", "water is sprayed across a hard surface"], "sample_ids": ["wIJK3-5y0kA", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["a, cry, baby", "water, spray, surface"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a baby cries and a woman speaks", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["yaln9y8I7ms", "w5W5Kqtc8E"], "start_seconds": ["230", "100"], "properties": ["female, flushes, toilet", "wind, blow, vehicle"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a toilet flushes and a man speaks", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a telephone rings followed by a woman talking"], "sample_ids": ["uEU-Hg5MTN8", "tGcFnX0GHI"], "start_seconds": ["27", "0"], "properties": ["a woman, laughs, animal", "ring, talk, woman"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a dial tone sounds followed by a woman speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "wind blows as people chatter quietly"], "sample_ids": ["w2JXXIAdUdg", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["snoring, distance, person", "wind, chatter, people"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "footage is blurry and out of focus"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person sniffs and sneezes", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["uRlbY6aoBU", "vfYTJq7nU"], "start_seconds": ["0", "130"], "properties": ["sneezes, person, sniffs", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is sneezing ", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "a stream of water runs briefly"], "sample_ids": ["wz7N8YRy74I", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["rooster, crow, background, men", "stream, water, run"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["s3cTDAj31g", "zFjIWfSD-4"], "start_seconds": ["80", "410"], "properties": ["man, talk, woman", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a baby is crying", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a man talking followed by a woman shouting?", "label": 0}, {"captions": ["music plays followed by gunshots and then an explosion", "water flows and trickles"], "sample_ids": ["xKB8O8LTs6s", "tB7hWb9gTuQ"], "start_seconds": ["70", "30"], "properties": ["music, gunshots, explosion", "water, flow, trickle"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "water is splashing and gurgling"], "question": "which entity is more calm", "label": 1}, {"captions": ["a man talks as several small engines run", "dishes cling together then a man begins to speak"], "sample_ids": ["u9A6VZQCZpU", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["a, man, talk", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["continuous sneezing together with speech", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["x4dZyf9Gbj0", "tdWhHV3X25Q"], "start_seconds": ["130", "60"], "properties": ["continuous, sneeze, speech", "applause, audience, yells"], "captions_pred_video": ["footage is blurry and out of focus", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman sneezes and speaks", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "a baby coos and fidgets as a lady speaks and laughs"], "sample_ids": ["wudZTNBtVqc", "uPDn2BFTHk"], "start_seconds": ["60", "140"], "properties": ["accelerates, engine, wind", "lady, laugh, baby"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a baby laughs and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "a infant makes noise and is excited"], "sample_ids": ["shmR4OZtzqA", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["man, engine, idle", "noise, excited, infant"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a man speaks while a motor runs", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 0}, {"captions": ["an adult man speaks over glass clinking", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["u6jIvCtKarQ", "tdWhHV3X25Q"], "start_seconds": ["70", "60"], "properties": ["a, man, speaks", "applause, audience, yells"], "captions_pred_video": ["footage of a person using a blender on a stove top", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "winds blows roughly as a vehicle races past"], "sample_ids": ["sa6TLVbooCc", "xjvTpk2Zpr8"], "start_seconds": ["240", "70"], "properties": ["people, laugh, child", "wind, blows, vehicle"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "a duck quacks continuously"], "sample_ids": ["uWPRNLnpy7Y", "vh30P49Po6s"], "start_seconds": ["10", "30"], "properties": ["accelerate, laugh, vehicle", "quacks, continuously, duck"], "captions_pred_video": ["is taken from a car driving down the street", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "a propeller rotates loudly and intensely"], "sample_ids": ["wAAkbZToh8", "ugHJF0hfYkg"], "start_seconds": ["0", "10"], "properties": ["burp, laugh, speak", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man burps and a woman speaks", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a woman speaks and is crumpling paper", "an airplane engine runs"], "sample_ids": ["xvDdE3zNf8Y", "yVPZ2MNWpms"], "start_seconds": ["120", "0"], "properties": ["A, crumple, paper", "engine, airplane, runs"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a woman speaks and crumples paper", "a car is driving by on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "a man speaks as a car is passing by"], "sample_ids": ["ziUT9IFTkjg", "sK4u5T8hW78"], "start_seconds": ["10", "30"], "properties": ["background, birds, rustling", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "a car accelerates and wind blows"], "sample_ids": ["uC9dtII1KDI", "u0TrcHhkPQ"], "start_seconds": ["150", "20"], "properties": ["wind, gusts, distance", "accelerates, wind, blows"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", null], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a race car accelerates and revs its engine "], "question": "which entity is about a car?", "label": 1}, {"captions": ["a frog vocalizes while birds chirp", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vMf1dLD6Sng", "zl9Dqx-j7q4"], "start_seconds": ["6", "6"], "properties": ["frog, bird, vocalize", "engine, laugh, loud"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a frog croaks loudly", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "birds chirp and objects are moved around"], "sample_ids": ["yeFvk9x0wWI", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["clack, bird, chirp", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "a duck quacks loudly and continuously"], "sample_ids": ["w2M4i1mklOA", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["alarm, gears, turn", "loud, continuous, quacks"], "captions_pred_video": ["footage of an antique clock", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a chime of a clock followed by various tones of ticking with come clinking"], "sample_ids": ["vzxHnu-SFEw", "uqFtmnhuqA8"], "start_seconds": ["80", "30"], "properties": ["two objects, woman, speak", "a, b, c"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "shows a clock on the wall of a room with a person standing in front of it"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "mechanisms are ticking and a hammer is striking "], "question": "which entity is a clock?", "label": 1}, {"captions": ["a woman speaks followed by clicks and scraping", "a man speaks over intermittent keyboard taps"], "sample_ids": ["yYJksgsxx5U", "tw76HGONaKg"], "start_seconds": ["30", "570"], "properties": ["audio, clicks, scraping", "audio, man, keyboard"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a man speaks and types on a computer keyboard "], "question": "which entity has a man speaking over intermittent keyboard taps?", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "a machine beeps continuously"], "sample_ids": ["sWZzXuWYY", "y682ml90jGw"], "start_seconds": ["420", "11"], "properties": ["male, clanks, thumps", "beeps, machine, continuously"], "captions_pred_video": [null, null], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a beeping sound is being made "], "question": "which machine beeps continuously", "label": 1}, {"captions": ["an airplane engine runs", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["yVPZ2MNWpms", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["engine, airplane, runs", "a woman, something, fried"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a car is driving by on the road ", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of something being fried?", "label": 1}, {"captions": ["an airplane engine spools and people speak", "a man speaks as a motor runs in the background"], "sample_ids": ["wTjoRj1se3U", "xZepNM9qcRA"], "start_seconds": ["390", "30"], "properties": ["airplane, engine, spool", "background, motor, run"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a jet engine is running and people are talking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["w5W5Kqtc8E", "ukg5L09Wpvo"], "start_seconds": ["100", "150"], "properties": ["water, splashes, motorboat", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a helicopter engine idles continuously", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["ugHJF0hfYkg", "uYT5gxnyMWM"], "start_seconds": ["10", "50"], "properties": ["engine, idle, continuously", "female, spraying, scream"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wvKpEYswXO0", "sSMl2vc3ek"], "start_seconds": ["150", "20"], "properties": ["plastic, tap, speak", "loud, multiple, distance"], "captions_pred_video": ["of the person preparing food in the kitchen", null], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a motor runs and stops, and animals squawk and croak", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["s4tUs779vBA", "y2bVZ7rz-5M"], "start_seconds": ["160", "280"], "properties": ["a, sound, stop", "motor noise, horn, siren"], "captions_pred_video": ["game in 10 words or less - screenshot 1", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a car is revving and a man is speaking ", "a truck is honking its horn and a siren is blaring "], "question": "which entity has a horn honking?", "label": 1}, {"captions": ["a vehicle engine runs as a siren and horn sound", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["u--KhUW8l1Y", "ukg5L09Wpvo"], "start_seconds": ["0", "150"], "properties": ["sound, vehicle, horn", "clickety-clack, train, whistle"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["siJFXfGWgDk", "ukg5L09Wpvo"], "start_seconds": ["50", "150"], "properties": ["man, woman, vehicle", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "an infant crying as a woman laughs"], "sample_ids": ["vXlk0lIQBFo", "xhmRY9yhC7c"], "start_seconds": ["470", "20"], "properties": ["wind, speak, vocalize", "a, laugh, infant"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a child speaks in closed space", "an engine runs loudly"], "sample_ids": ["yW6FWLSLkx4", "vqZuVbG6-HI"], "start_seconds": ["40", "130"], "properties": ["child, space, speak", "loud, engine, run"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["dogs barking and whimpering", "people speak in a closed space"], "sample_ids": ["tIY7qOV3rEM", "sTpirNYo8vQ"], "start_seconds": ["0", "30"], "properties": ["barking, whimpering, dog", "people, space, speak"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "of a man taking a selfie on a bus"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a man is speaking while a car is revving and accelerating "], "question": "which entity is more passive", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "paper folding and crinkling"], "sample_ids": ["s4Uz1Ffgo04", "zPpG3RD8lSs"], "start_seconds": ["100", "20"], "properties": ["water, rushes, motorcycle", "paper, fold, crinkle"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "the wind blows and a mouse clicks "], "question": "which entity is stationary", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "dog barking and vehicle engine idling followed shortly by vehicle engine revving"], "sample_ids": ["vBHyYJ8pL0", "zY3icUyMdh8"], "start_seconds": ["2", "20"], "properties": ["noise, door, opening", "dog, bark, engine"], "captions_pred_video": [null, "footage of a bus driving through a residential street at night"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a car is driving and dogs are barking and squealing "], "question": "which entity is accompanied by a door opening and closing", "label": 0}, {"captions": ["the revving of an engine throttle followed by a man speaking", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["tezvROoo4bs", "wqZ135Ssz0"], "start_seconds": ["40", "60"], "properties": ["audio, throttle, speaking", "two men, woman, birds"], "captions_pred_video": ["footage of a busy city street with cars parked on both sides of the road", null], "captions_pred_audio": ["a car accelerates and revs while a man speaks ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["an insect buzzes around continuously", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["v25l1jef3JY", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["buzzes, continuously, insect", "a woman, something, fried"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "a woman speaks happily and an animal chirps"], "sample_ids": ["vimzuGQvdcU", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["a, man, yells", "a woman, chirps, animal"], "captions_pred_video": ["a group of people are rafting down a river", null], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a woman is speaking and a dog is barking "], "question": "which entity is more calm", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vJvryTwuAV8", "uYT5gxnyMWM"], "start_seconds": ["16", "50"], "properties": ["audience, cheer, man", "a, scream, girl"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a woman is speaking and a baby is crying"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["a consistent ticking pattern", "water pouring and bubbling"], "sample_ids": ["sCeWURVHfOM", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["ticking, pattern, clock", "water, bubbles, pouring"], "captions_pred_video": ["- a close-up view of the clock's inner workings", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["ticking of a clock", "water is running from a faucet"], "question": "which entity is more likely to be found in a kitchen", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["vqZuVbG6-HI", "uEU-Hg5MTN8"], "start_seconds": ["130", "27"], "properties": ["background, male, female", "animal, grunts, snorts"], "captions_pred_video": ["footage is blurry because it's raining outside", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking and an animal grunts and snorts?", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "an airplane engine runs"], "sample_ids": ["wz7N8YRy74I", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["rooster, crow, background, people", "engine, airplane, runs"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "wind blowing followed by a zoom"], "sample_ids": ["vzceMbklWc", "vr8ZXjEBhMQ"], "start_seconds": ["180", "150"], "properties": ["water, faucet, sink", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["water is running and a man is speaking", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "an infant crying as a woman laughs"], "sample_ids": ["s4Uz1Ffgo04", "xhmRY9yhC7c"], "start_seconds": ["100", "20"], "properties": ["water, rushes, vehicle", "a, laugh, infant"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a beep occurs briefly", "three men talk while wind blows and some liquid flows"], "sample_ids": ["xtWeJ56-U-g", "vJ7JPEFhyLA"], "start_seconds": ["20", "16"], "properties": ["beep, occur, briefly", "three men, wind, flow"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more likely to be heard", "label": 0}, {"captions": ["an airplane engine runs", "people applaud and hoot and chat quietly"], "sample_ids": ["yVPZ2MNWpms", "wwyfGO2J4"], "start_seconds": ["0", "90"], "properties": ["engine, airplane, runs", "people, applaud, hoot"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", null], "captions_pred_audio": ["a car is driving by on the road ", "people are clapping and speaking with background noise "], "question": "which entity is a performance", "label": 1}, {"captions": ["a duck quacks several times", "someone is typing on a computer keyboard"], "sample_ids": ["vh30P49Po6s", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["quacks, duck, several", "keyboard, type, computer"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "how to make money on youtube in spanish"], "captions_pred_audio": ["a duck is quacking loudly", "a person is typing on a keyboard"], "question": "which is not a type of keyboard", "label": 0}, {"captions": ["scraping and female speech with distant music", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["yHeVV-xeOxQ", "wz7N8YRy74I"], "start_seconds": ["130", "30"], "properties": ["female, speech, music", "rooster, crow, background, men"], "captions_pred_video": ["of a girl milking a goat's udder", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster crow in the background?", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "water splashes and a door squeaks"], "sample_ids": ["wSVhSdj0F0", "sdXV-ylviw"], "start_seconds": ["10", "190"], "properties": ["horn honks, keys jingle, slam", "sound, splash, door"], "captions_pred_video": [null, null], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a dog barks and taps with background noise "], "question": "which entity has a door?", "label": 1}, {"captions": ["an animal quacks rapidly", "a woman talks while a baby cries and a man whispers"], "sample_ids": ["vh30P49Po6s", "smDKStoHBJo"], "start_seconds": ["30", "0"], "properties": ["animal, quacks, rapidly", "a, talk, baby, cry"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a man holding a crying baby in his arms"], "captions_pred_audio": ["a duck is quacking loudly", "a baby is crying and a woman is speaking"], "question": "which entity is a human", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "a person snores loudly multiple times at a close distance"], "sample_ids": ["sG7TyPnFDR0", "sSMl2vc3ek"], "start_seconds": ["180", "20"], "properties": ["beeps, machine, smoke alarm", "loud, multiple, distance"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", null], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["scraping and female speech with distant music", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["yHeVV-xeOxQ", "ukg5L09Wpvo"], "start_seconds": ["130", "150"], "properties": ["female, speech, music", "clickety-clack, train, whistle"], "captions_pred_video": ["of a girl milking a goat's udder", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a stream of water flows quickly", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["wbHTKEJZyhc", "yDoT73BWsdA"], "start_seconds": ["20", "10"], "properties": ["stream, water, flow", "engine, revs, vehicle"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["zofjfKhqLk8", "zFjIWfSD-4"], "start_seconds": ["10", "410"], "properties": ["background, metal, clings", "People, motor, brakes"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a saw running?", "label": 0}, {"captions": ["a man speaks as a car is passing by", "wind blowing followed by a zoom"], "sample_ids": ["sK4u5T8hW78", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["a, car, pass", "wind, blow, zoom"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a small engine idles continuously", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["y5WII6cTH7k", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["engine, idle, continuously", "a woman, laughs, animal"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a woman is speaking and a baby is crying"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a woman speaks happily and an animal chirps", "people applaud and hoot and chat quietly"], "sample_ids": ["uWAAAL4CIoc", "wwyfGO2J4"], "start_seconds": ["0", "90"], "properties": ["a woman, chirps, animal", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "an infant crying as a woman laughs"], "sample_ids": ["vs65y4qmyBE", "xhmRY9yhC7c"], "start_seconds": ["340", "20"], "properties": ["engine, run, man", "a, laugh, infant"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "a infant makes noise and is excited"], "sample_ids": ["vqZuVbG6-HI", "wIJK3-5y0kA"], "start_seconds": ["130", "30"], "properties": ["background, male, female", "noise, excited, infant"], "captions_pred_video": ["footage is blurry because it's raining outside", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "several insects fly while two men talk"], "sample_ids": ["sTpirNYo8vQ", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["a, tone, fast", "several, fly, men"], "captions_pred_video": ["of a man taking a selfie on a bus", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a woman speaking in a fast tone with a male?", "label": 0}, {"captions": ["an engine idles quietly then gradually becomes louder", "wind blows as people chatter quietly"], "sample_ids": ["vbr9mHKc8WM", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["noise, loudness, engine", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["an engine is idling", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a motor runs and stops, and animals squawk and croak", "a propeller rotates loudly and intensely"], "sample_ids": ["s4tUs779vBA", "ugHJF0hfYkg"], "start_seconds": ["160", "10"], "properties": ["a, sound, stop", "loud, intense, propeller"], "captions_pred_video": ["game in 10 words or less - screenshot 1", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a car is revving and a man is speaking ", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["sjlVMgdGSK0", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["car, revving, loudly", "People, motor, brakes"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which is not a vehicle", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "a group of people chatter and talk as multiple horns honk in the background"], "sample_ids": ["wy1eKjR7KC0", "yLy-WycbVVE"], "start_seconds": ["30", "30"], "properties": ["people, talk, distance", "background, people, talk"], "captions_pred_video": ["two police officers riding motorcycles down the street", "a soccer field in a stadium with yellow and red seats"], "captions_pred_audio": ["a man is speaking and a siren is going off", "a man is speaking and a church bell is ringing with wind noise in the background "], "question": "which entity has more people talking", "label": 1}, {"captions": ["a child speaks in closed space", "water flows and trickles"], "sample_ids": ["yW6FWLSLkx4", "tB7hWb9gTuQ"], "start_seconds": ["40", "30"], "properties": ["child, space, speak", "water, flow, trickle"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a person whistles and clicks a mouse", "a motorcycle engine works nearby"], "sample_ids": ["zCrAfDfv6-A", "tOSWIURC-4"], "start_seconds": ["30", "0"], "properties": ["person, mouse, click", "engine, work, nearby"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", null], "captions_pred_audio": ["a person whistles a song", "a lawn mower is running "], "question": "which entity is a person", "label": 0}, {"captions": ["a jet engine spools up and takes off", "a man speaks followed by another man speaking outside"], "sample_ids": ["vBslzh7saPw", "viuTg1M-dqg"], "start_seconds": ["90", "30"], "properties": ["engine, spools, takes", "two men, speak, follow"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of a man speaking followed by another man speaking outside?", "label": 1}, {"captions": ["a train horn blows as it passes by", "a steam engine runs and whistles as it passes by"], "sample_ids": ["zVacuqSb4LI", "se87d6yxEOA"], "start_seconds": ["30", "10"], "properties": ["horn, blows, train", "run, whistle, pass"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "footage of a train passing by a train station with smoke billowing out of the train's smokestack"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a train is moving and blowing its whistle "], "question": "which train is more likely to blow its horn", "label": 0}, {"captions": ["people speak and laugh as a child speaks", "several insects fly while two men talk"], "sample_ids": ["sa6TLVbooCc", "s-T9OVOiMLo"], "start_seconds": ["240", "330"], "properties": ["people, laugh, child", "several, fly, men"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more people", "label": 0}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vBHyYJ8pL0", "xfaoyyzw2WU"], "start_seconds": ["2", "180"], "properties": ["noise, door, opening", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "an aircraft engine roars and a man speaks "], "question": "which noise is louder", "label": 1}, {"captions": ["a person speaks briefly", "a man speaks as a car is passing by"], "sample_ids": ["zOZleIRqZm4", "sK4u5T8hW78"], "start_seconds": ["80", "30"], "properties": ["person, talk, brief", "a, car, pass"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a person talking briefly?", "label": 0}, {"captions": ["an animal bleats and cries out and metal bangs", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["xfudFO976zE", "vlS6YMeWAPo"], "start_seconds": ["0", "40"], "properties": ["animal, bleats, cry", "sheep, baa, birds"], "captions_pred_video": ["footage is blurry and shaky", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a goat bleats and birds chirp"], "question": "which entity is a sheep?", "label": 1}, {"captions": ["a person is whistling", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sIXTftIuUgw", "sLUnaPT5gM8"], "start_seconds": ["90", "0"], "properties": ["person, whistling, person", "loud, laughter, intermittent"], "captions_pred_video": [null, "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a person whistling a song", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "water flows and trickles"], "sample_ids": ["sofxkNWaP0s", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["wind, engine, louder", "water, flow, trickle"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["vzxHnu-SFEw", "wqZ135Ssz0"], "start_seconds": ["80", "60"], "properties": ["two objects, woman, speak", "two men, woman, birds"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", null], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["people speak softly as food sizzles", "a train horn blows as it passes by"], "sample_ids": ["yhQ2Lg-7qDY", "zVacuqSb4LI"], "start_seconds": ["130", "30"], "properties": ["food, sizzle, speak", "horn, blows, train"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which is louder", "label": 0}, {"captions": ["a woman speaks and is crumpling paper", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["xvDdE3zNf8Y", "xKB8O8LTs6s"], "start_seconds": ["120", "70"], "properties": ["A, crumple, paper", "music, gunfire, explosion"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman speaks and crumples paper", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a person is snoring while sleeping", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vJrjSeP17yE", "ukg5L09Wpvo"], "start_seconds": ["40", "150"], "properties": ["a person is sleeping, snoring, person", "clickety-clack, train, whistle"], "captions_pred_video": ["a black background with a small plane flying in the sky", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a person snoring loudly", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "water pouring and bubbling"], "sample_ids": ["zofjfKhqLk8", "uyRfq-jKPpo"], "start_seconds": ["10", "50"], "properties": ["background, metal, clings", "water, bubbles, pouring"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "people applaud and hoot and chat quietly"], "sample_ids": ["t97k0cejSQE", "wwyfGO2J4"], "start_seconds": ["250", "90"], "properties": ["bird, chirp, insect", "people, applaud, hoot"], "captions_pred_video": ["a bee on a purple thistle flower", null], "captions_pred_audio": ["a bee buzzes and a woman speaks", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "a stream of water runs briefly"], "sample_ids": ["zj2R0XoFr5k", "x-PeY8Yb8M4"], "start_seconds": ["50", "300"], "properties": ["airplane, boy, fly", "stream, water, run"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "several insects fly while two men talk"], "sample_ids": ["w2M4i1mklOA", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["alarm, gears, turn", "several, fly, men"], "captions_pred_video": ["footage of an antique clock", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a clock?", "label": 0}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "wind blowing followed by a zoom"], "sample_ids": ["su6FAOcOA8c", "vr8ZXjEBhMQ"], "start_seconds": ["4", "150"], "properties": ["engine, idle, woman", "wind, blow, zoom"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a propeller moves loudly nearby", "people applaud and hoot and chat quietly"], "sample_ids": ["ugHJF0hfYkg", "wwyfGO2J4"], "start_seconds": ["10", "90"], "properties": ["loud, propeller, move", "people, applaud, hoot"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "people are clapping and speaking with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "a weapon fires multiple times"], "sample_ids": ["ul60S8TXDA8", "sMC07Ucy7kg"], "start_seconds": ["60", "10"], "properties": ["sound, distance, bell", "weapon, fire, multiple"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "footage is from a car's point of view"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["birds chirp as a bell rings", "goats bleat and people speak"], "sample_ids": ["ziUT9IFTkjg", "z5iUE5h0EPs"], "start_seconds": ["10", "30"], "properties": ["chirp, bell, ring", "goats bleat, people speak, language"], "captions_pred_video": [null, "of the goat in the barn"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a goat bleats and a man speaks"], "question": "which entity is speaking a language", "label": 1}, {"captions": ["paper folding and crinkling", "a person is snoring while sleeping"], "sample_ids": ["zPpG3RD8lSs", "vJrjSeP17yE"], "start_seconds": ["20", "40"], "properties": ["paper, fold, crinkle", "a person is sleeping, snoring, person"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "a black background with a small plane flying in the sky"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a person snoring loudly"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a woman speaks with water running", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["wTideSjRFS0", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["water, running, woman", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["uKCSGgof8gI", "y8WEcpOlT3I"], "start_seconds": ["12", "40"], "properties": ["chirps, distance, signal", "harsh, wind, blows"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a man is speaking with wind noise in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["s4Uz1Ffgo04", "su6FAOcOA8c"], "start_seconds": ["100", "4"], "properties": ["water, rushes, vehicle", "engine, idle, woman"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "water flows and trickles"], "sample_ids": ["sfAvvZwdLCY", "tB7hWb9gTuQ"], "start_seconds": ["20", "30"], "properties": ["flushes, drains, water", "water, flow, trickle"], "captions_pred_video": ["footage of the toilet in the bathroom", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a toilet is flushed", "water is splashing and gurgling"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a person is whistling", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["sIXTftIuUgw", "tDVADusiIoc"], "start_seconds": ["90", "60"], "properties": ["person, whistling, person", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a person whistling a song", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "a clock ticktocks"], "sample_ids": ["wDVMhEdTiVw", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["gun, shoot, water", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a blurry image of trees and water in the forest", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a telephone rings and a bird vocalizes", "a clock ticktocks"], "sample_ids": ["skd2PphS6oI", "v-g-j2uTByM"], "start_seconds": ["190", "30"], "properties": ["ring, bird, vocalize", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a telephone bell rings repeatedly ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "a vehicle engine accelerating then running on idle"], "sample_ids": ["zuua6-5goWw", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["birds, chirp, quiet, man, speaks", "engine, accelerate, idle"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "an engine is idling"], "question": "which is quieter", "label": 1}, {"captions": ["a man speaks as a machine runs", "an airplane engine runs"], "sample_ids": ["vD6lYD1l0BY", "yVPZ2MNWpms"], "start_seconds": ["330", "0"], "properties": ["a, machine, run", "engine, airplane, runs"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a car is driving by on the road "], "question": "which machine runs", "label": 1}, {"captions": ["a man is filing a hard object", "an insect buzzes around continuously"], "sample_ids": ["vveS8HT7Uog", "v25l1jef3JY"], "start_seconds": ["100", "0"], "properties": ["a man, hard, object", "buzzes, continuously, insect"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["a door opens and closes", "an insect buzzes around continuously"], "sample_ids": ["vBHyYJ8pL0", "v25l1jef3JY"], "start_seconds": ["2", "0"], "properties": ["open, close, door", "buzzes, continuously, insect"], "captions_pred_video": [null, "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a fly is buzzing around a microphone "], "question": "which entity is more likely to be a nuisance", "label": 1}, {"captions": ["water flows as men speak and yell", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vJ7JPEFhyLA", "sSMl2vc3ek"], "start_seconds": ["16", "20"], "properties": ["water, flow, men", "loud, multiple, distance"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "paper is crumpling consistently"], "sample_ids": ["xjvTpk2Zpr8", "v5cSxLaHADY"], "start_seconds": ["70", "0"], "properties": ["engine, run, wind", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a jet engine roars and wind blows ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "several insects fly while two men talk"], "sample_ids": ["tQWGZLItBXk", "s-T9OVOiMLo"], "start_seconds": ["170", "330"], "properties": ["voice, music, whoosh", "several, fly, men"], "captions_pred_video": ["worms revolution screenshots", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "a vehicle engine accelerating then running on idle"], "sample_ids": ["sZPuqDgX2V0", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["engine, accelerate, intercom", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "an engine is idling"], "question": "which entity shows a vehicle engine accelerating then running on idle?", "label": 1}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "wind blows and a vehicle blows a hard then a train blows a horn"], "sample_ids": ["wSVhSdj0F0", "wnpJndXuxLc"], "start_seconds": ["10", "50"], "properties": ["beep, clang, footsteps", "blows, vehicle, train"], "captions_pred_video": [null, "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is a train?", "label": 1}, {"captions": ["waves crash against a shoreline and people speak", "a infant makes noise and is excited"], "sample_ids": ["yFB25fqfU8I", "wIJK3-5y0kA"], "start_seconds": ["300", "30"], "properties": ["wave, crash, shoreline", "noise, excited, infant"], "captions_pred_video": ["footage of a person surfing in the ocean", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a infant makes noise and is excited", "a infant makes noise and is excited"], "sample_ids": ["wIJK3-5y0kA", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["noise, excited, infant", "noise, excited, infant"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a baby cries and a woman speaks", "a baby cries and a woman speaks"], "question": "which infant is making noise and is excited", "label": 1}, {"captions": ["water flows as men speak and yell", "a vehicle engine accelerating then running on idle"], "sample_ids": ["vJ7JPEFhyLA", "vYkA3cfXp5Q"], "start_seconds": ["16", "30"], "properties": ["water, flow, men", "engine, accelerate, idle"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a dog barks and whimpers", "winds blows roughly as a vehicle races past"], "sample_ids": ["sShpyu2l4YQ", "xjvTpk2Zpr8"], "start_seconds": ["0", "70"], "properties": ["barks, whimpers, dog", "wind, blows, vehicle"], "captions_pred_video": ["the puppies are playing with a toy", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a dog is barking and growling", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "a man speaks followed by another man speaking outside"], "sample_ids": ["vf44CgrjT0A", "viuTg1M-dqg"], "start_seconds": ["20", "30"], "properties": ["loud, long, person", "two men, speak, follow"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a loud burp", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more quiet", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["uZesmtKZGSw", "vfYTJq7nU"], "start_seconds": ["250", "130"], "properties": ["men, talk, cars", "rustling, ducks, quack"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", null], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["yaln9y8I7ms", "vbZ-0lGPneg"], "start_seconds": ["230", "30"], "properties": ["female, flushes, toilet", "a woman, a television program, a bird"], "captions_pred_video": ["footage is blurry and out of focus", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "vehicles pass by on a roadway"], "sample_ids": ["su6FAOcOA8c", "tgbONvsP47Y"], "start_seconds": ["4", "0"], "properties": ["engine, idle, woman", "pass, vehicle, roadway"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["an engine runs and a man speaks", "a car speeds away loudly followed by a car revving loudly and driving away while outside"], "sample_ids": ["yT5WfYMRr-U", "sjlVMgdGSK0"], "start_seconds": ["30", "30"], "properties": ["engine, run, man", "car, revving, loudly"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "a 1965 ford falcon drag racing at 100mph on a 1/8 mile track"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a car accelerates and revs its engine "], "question": "which entity is revving loudly", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "some tunes played by whistling"], "sample_ids": ["wyllXV6PjKo", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["a baby, a woman, a man", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a woman speaks and a baby cries", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vbr9mHKc8WM", "sSMl2vc3ek"], "start_seconds": ["40", "20"], "properties": ["noise, loudness, engine", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["an engine is idling", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a man woman speak while crickets sing", "people applaud and hoot and chat quietly"], "sample_ids": ["zTLVJCo4WEE", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["a, crickets, sing", "people, applaud, hoot"], "captions_pred_video": ["- a boy with a rifle aiming at a target", null], "captions_pred_audio": ["a woman speaks and crickets chirp", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "a man speaks followed by another man speaking outside"], "sample_ids": ["vimzuGQvdcU", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["a, man, yells", "two men, speak, follow"], "captions_pred_video": ["a group of people are rafting down a river", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "a dog barks and whimpers"], "sample_ids": ["sQwlkXjQabo", "sShpyu2l4YQ"], "start_seconds": ["10", "0"], "properties": ["liquid, surface, spray", "barks, whimpers, dog"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "the puppies are playing with a toy"], "captions_pred_audio": ["spraying followed by silence", "a dog is barking and growling"], "question": "which entity is a dog", "label": 1}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["wjsXBsc7M40", "yajyRTUQk3U"], "start_seconds": ["10", "400"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "a woman, something, fried"], "captions_pred_video": ["footage of the baby playing with a toothbrush", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a woman is speaking while food is frying in the background"], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a clock ticktocks in wind", "a man speaks as a car is passing by"], "sample_ids": ["yVumC9TGknc", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, wind", "a, car, pass"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a series of beeps and chirps", "a man is speaking with background noise and breathing sounds "], "question": "which entity is moving", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "water flows and trickles"], "sample_ids": ["sxYkFKFIZD0", "tB7hWb9gTuQ"], "start_seconds": ["20", "30"], "properties": ["screech, man, door", "water, flow, trickle"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "wind blowing followed by a zoom"], "sample_ids": ["yYEVLuqEytU", "vr8ZXjEBhMQ"], "start_seconds": ["40", "150"], "properties": ["animal, pig, background", "wind, blow, zoom"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["several sheep bleat and a man speaks", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a few ducks quack and scamper and a man speaks", "females talk and laugh over gusting wind"], "sample_ids": ["w2bYrCVLT60", "un9VQlzgZM"], "start_seconds": ["120", "5"], "properties": ["ducks, speak, quack", "females, talk, laugh"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", null], "captions_pred_audio": ["ducks are quacking and a man is speaking", "a woman is speaking and laughing with wind noise and breathing in the background "], "question": "which entity is more social", "label": 1}, {"captions": ["a baby laugh at a sputter", "a car accelerates and wind blows"], "sample_ids": ["sLUnaPT5gM8", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["laugh, sputter, baby", "accelerates, wind, blows"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", null], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["small dogs yip and bark sharply", "a man speaks while turning a water faucet on"], "sample_ids": ["v-wcQf4BDY0", "vf9xf3vMsGM"], "start_seconds": ["120", "540"], "properties": ["bark, yip, sharply", "A man speaks while turning a water faucet on."], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "of the person washing their hands under the faucet"], "captions_pred_audio": ["a dog barks and growls", "a man is speaking while water is running in the background"], "question": "which entity is a human", "label": 1}, {"captions": ["a machine runs continuously", "a machine beeps continuously"], "sample_ids": ["wdXV3Pv0jiY", "y682ml90jGw"], "start_seconds": ["11", "11"], "properties": ["machine, running, continuously", "beeps, machine, continuously"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a beeping sound is being made "], "question": "which machine beeps continuously", "label": 1}, {"captions": ["water flows followed by women screaming", "wind blows as people chatter quietly"], "sample_ids": ["w5W5Kqtc8E", "xBxDz0CFVn0"], "start_seconds": ["100", "30"], "properties": ["water, flow, women", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["sjlVMgdGSK0", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["car, revving, loudly", "engine, revs, vehicle"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a race car accelerates and revs its engine "], "question": "which vehicle is revving loudly", "label": 0}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "a woman speaks happily and an animal chirps"], "sample_ids": ["uPDn2BFTHk", "uWAAAL4CIoc"], "start_seconds": ["140", "0"], "properties": ["lady, laugh, baby", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a woman is speaking and a dog is barking "], "question": "which entity has a baby?", "label": 0}, {"captions": ["a woman sneezes then speaks", "several ducks quack and cocks crow far away"], "sample_ids": ["x4dZyf9Gbj0", "sNB8zxXneIM"], "start_seconds": ["130", "20"], "properties": ["sneezes, speaks, woman", "several, quack, cocks"], "captions_pred_video": ["footage is blurry and out of focus", "a group of geese in a cage"], "captions_pred_audio": ["a woman sneezes and speaks", "a rooster is crowing and wind is blowing "], "question": "which entity is a bird", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "a telephone rings followed by a woman talking"], "sample_ids": ["vdoxuJn9lTc", "tGcFnX0GHI"], "start_seconds": ["40", "0"], "properties": ["burp, loud, girl", "ring, talk, woman"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", null], "captions_pred_audio": ["a child speaks followed by a burp", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a stream of water runs briefly", "pigeons vocalize and birds chirp"], "sample_ids": ["x-PeY8Yb8M4", "uiS58TNyUiw"], "start_seconds": ["300", "430"], "properties": ["stream, water, run", "vocalize, bird, chirp"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "of the pigeon in the cage"], "captions_pred_audio": ["a car is driving on a wet road ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a stream of water?", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "some men converse over an engine running"], "sample_ids": ["xjhAnI2q6hM", "sCiy7QS1U"], "start_seconds": ["6", "300"], "properties": ["engine revs, vehicle, people", "men, converse, engine"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", null], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows people talking to each other?", "label": 1}, {"captions": ["a person speaks over rustling leaves", "a man speaks followed by another man speaking outside"], "sample_ids": ["zOZleIRqZm4", "viuTg1M-dqg"], "start_seconds": ["80", "30"], "properties": ["rustling, leaves, person", "two men, speak, follow"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["yZmhM1HcsyE", "tDVADusiIoc"], "start_seconds": ["4", "60"], "properties": ["engine, roar, water", "water, radio, man"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["sfAvvZwdLCY", "yDoT73BWsdA"], "start_seconds": ["20", "10"], "properties": ["water drains, flushes, water", "engine, revs, vehicle"], "captions_pred_video": ["footage of the toilet in the bathroom", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a toilet is flushed", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a man talks while vehicles pass by", "several insects fly while two men talk"], "sample_ids": ["sK4u5T8hW78", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["a, man, talk", "several, fly, men"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more vehicles passing by", "label": 0}, {"captions": ["a child speaks in closed space", "paper is crumpling consistently"], "sample_ids": ["yW6FWLSLkx4", "v5cSxLaHADY"], "start_seconds": ["40", "0"], "properties": ["child, space, speak", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "a group of people chatter and talk as multiple horns honk in the background"], "sample_ids": ["sNB8zxXneIM", "yLy-WycbVVE"], "start_seconds": ["20", "30"], "properties": ["several, quack, cocks", "background, people, talk"], "captions_pred_video": ["a group of geese in a cage", "a soccer field in a stadium with yellow and red seats"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a man is speaking and a church bell is ringing with wind noise in the background "], "question": "which entity is talking", "label": 1}, {"captions": ["a infant makes noise and is excited", "someone is typing on a computer keyboard"], "sample_ids": ["wIJK3-5y0kA", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["noise, excited, infant", "keyboard, type, computer"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "how to make money on youtube in spanish"], "captions_pred_audio": ["a baby cries and a woman speaks", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["an engine runs and a man speaks", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["yT5WfYMRr-U", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["engine, run, man", "engine, idle, woman"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking and a subway train is moving "], "question": "which entity has a man speaking while an engine runs?", "label": 0}, {"captions": ["a woman speaks with water running", "paper is crumpling consistently"], "sample_ids": ["wTideSjRFS0", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["water, running, woman", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["zFjIWfSD-4", "uYT5gxnyMWM"], "start_seconds": ["410", "50"], "properties": ["People, motor, brakes", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a woman and man speak while food is frying", "multiple birds vocalize and wind blows"], "sample_ids": ["zk-xJGQU8-4", "uoGVs9yUqY4"], "start_seconds": ["130", "30"], "properties": ["food, man, woman", "multiple, vocalize, wind"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "for how to make a wooden shed door youtube"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "birds are chirping and flapping their wings with wind noise in the background "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vf9xf3vMsGM", "tiDFTC-5vU"], "start_seconds": ["540", "30"], "properties": ["A man speaks while turning a water faucet on.", "male, duck, laugh"], "captions_pred_video": ["of the person washing their hands under the faucet", null], "captions_pred_audio": ["a man is speaking while water is running in the background", "a man is speaking and ducks are quacking"], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a man speaks over intermittent keyboard taps", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["tw76HGONaKg", "tiDFTC-5vU"], "start_seconds": ["570", "30"], "properties": ["audio, man, keyboard", "male, duck, laugh"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", null], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck in it?", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "small dogs yip and bark sharply"], "sample_ids": ["x9JovgqUcs", "v-wcQf4BDY0"], "start_seconds": ["500", "120"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "a duck quacks continuously"], "sample_ids": ["w5W5Kqtc8E", "vh30P49Po6s"], "start_seconds": ["100", "30"], "properties": ["wind, blow, vehicle", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "someone whistles a tune"], "sample_ids": ["v0x1odnXtP0", "sIXTftIuUgw"], "start_seconds": ["210", "90"], "properties": ["keyboard, type, computer", "someone, tune, whistle"], "captions_pred_video": ["how to make money on youtube in spanish", null], "captions_pred_audio": ["a person is typing on a keyboard", "a person whistling a song"], "question": "which is a musical instrument", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a car accelerates and wind blows"], "sample_ids": ["tOSWIURC-4", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["engine, work, nearby", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a lawn mower is running ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "someone is typing on a computer keyboard"], "sample_ids": ["zofjfKhqLk8", "v0x1odnXtP0"], "start_seconds": ["10", "210"], "properties": ["background, metal, clank", "keyboard, type, computer"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "how to make money on youtube in spanish"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a person is typing on a keyboard"], "question": "which is not a machine", "label": 1}, {"captions": ["a clock ticktocks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["v-g-j2uTByM", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["ticktocks, clock, ticktocks", "men, talk, cars"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a clock is ticking loudly", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "someone is typing on a computer keyboard"], "sample_ids": ["wqZ135Ssz0", "v0x1odnXtP0"], "start_seconds": ["60", "210"], "properties": ["man, woman, squawks", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a person is typing on a keyboard"], "question": "which is not a type of keyboard", "label": 0}, {"captions": ["someone is burping continuously", "a duck quacks continuously"], "sample_ids": ["y636gklDioE", "vh30P49Po6s"], "start_seconds": ["20", "30"], "properties": ["burps, burps, burps", "quacks, continuously, duck"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a person burps loudly several times", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["goats bleat and people speak", "a woman speaks as she rubs two objects together"], "sample_ids": ["z5iUE5h0EPs", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["goats bleat, people speak, language", "two objects, woman, speak"], "captions_pred_video": ["of the goat in the barn", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a goat bleats and a man speaks", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "dishes cling together then a man begins to speak"], "sample_ids": ["wRBHTgrbiwg", "sQGXqGcwOTc"], "start_seconds": ["50", "3"], "properties": ["bird, owl, speak", "cling, speak, dishes"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking briefly?", "label": 0}, {"captions": ["three men talk while wind blows and some liquid flows", "a car speeding up in the distance"], "sample_ids": ["vJ7JPEFhyLA", "u0TrcHhkPQ"], "start_seconds": ["16", "20"], "properties": ["three men, wind, flow", "distance, car, speed"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["continuous snoring", "women speak as water runs briefly, children call out, and a man speaks"], "sample_ids": ["sLkeqCDJIyw", "uRExseg-0XI"], "start_seconds": ["120", "210"], "properties": ["loud, snoring, noise", "woman, man, water"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking while water is running and birds are chirping "], "question": "which entity is quieter", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "an engine runs loudly"], "sample_ids": ["xyL9F5VrjkE", "vqZuVbG6-HI"], "start_seconds": ["20", "130"], "properties": ["engine, run, wind", "loud, engine, run"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage is blurry because it's raining outside"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a lawn mower is running and men are speaking "], "question": "which entity is running", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["w2M4i1mklOA", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["loud, chime, bell", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of an antique clock", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a train blows its whistle and blows its horn "], "question": "which entity is quieter", "label": 1}, {"captions": ["rustling with distant murmuring", "a stream of water runs briefly"], "sample_ids": ["wnNNcxAPwGQ", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["sound, distance, rustling", "stream, water, run"], "captions_pred_video": ["footage of a yellow truck doing a burnout on a race track", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a crowd of people are talking and laughing while a skateboard rolls by ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["s4Uz1Ffgo04", "zj2R0XoFr5k"], "start_seconds": ["100", "50"], "properties": ["water, rushes, vehicle", "airplane, boy, fly"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "a man speaks as a vehicles passes by then a woman speaks"], "sample_ids": ["uKCSGgof8gI", "siJFXfGWgDk"], "start_seconds": ["12", "50"], "properties": ["chirps, distance, signal", "man, woman, vehicle"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a man is speaking and birds are chirping in the background "], "question": "which entity has a man speaking?", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "someone snores nearby"], "sample_ids": ["yLy-WycbVVE", "spJCm8tD9Zo"], "start_seconds": ["30", "90"], "properties": ["background, people, talk", "someone snores, nearby, someone"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a person is snoring loudly"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "water flows as men speak and yell"], "sample_ids": ["slZLHwNbbt4", "vJ7JPEFhyLA"], "start_seconds": ["300", "16"], "properties": ["clap, distance, horn", "water, flow, men"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water flowing?", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "wind blows in gusts as a woman speaks in the distance"], "sample_ids": ["x6ijhqRY38s", "uC9dtII1KDI"], "start_seconds": ["250", "150"], "properties": ["bowl, silverware, man", "wind, gusts, distance"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage of a person riding a horse in a riding arena"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a woman is speaking with wind noise and breathing in the background "], "question": "which entity is more likely to be in a bowl", "label": 0}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "a clock ticktocks"], "sample_ids": ["zkKdxzNC97Y", "v-g-j2uTByM"], "start_seconds": ["27", "30"], "properties": ["loud, bang, noise", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a door is opened and closed", "a clock is ticking loudly"], "question": "which entity is silent", "label": 0}, {"captions": ["a large bell chimes back and forth loudly", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["w2M4i1mklOA", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["loud, chime, bell", "a woman, something, fried"], "captions_pred_video": ["footage of an antique clock", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["a person sniffles and sneezes", "paper is crumpling consistently"], "sample_ids": ["uRlbY6aoBU", "v5cSxLaHADY"], "start_seconds": ["0", "0"], "properties": ["sneezes, sniffles, person", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is sneezing ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["zkKdxzNC97Y", "vbZ-0lGPneg"], "start_seconds": ["27", "30"], "properties": ["hard, surface, door", "a woman, a television program, a bird"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a door is opened and closed", "a woman is speaking and a dog is whimpering"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["continuous snoring", "a clicking followed by some people laughing and a kid speaking"], "sample_ids": ["sLkeqCDJIyw", "vz8868znkVQ"], "start_seconds": ["120", "60"], "properties": ["loud, snoring, noise", "audio, click, kid speaking"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "a video of a plane flying over a cloudy sky"], "captions_pred_audio": ["a person is snoring loudly", "a baby is laughing and breathing with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "water flows as men speak and yell"], "sample_ids": ["yJ0TePmaOo", "vJ7JPEFhyLA"], "start_seconds": ["390", "16"], "properties": ["two hard objects, man, speak", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water flowing?", "label": 1}, {"captions": ["a toilet flushes and water drains", "some men converse over an engine running"], "sample_ids": ["sfAvvZwdLCY", "sCiy7QS1U"], "start_seconds": ["20", "300"], "properties": ["water drains, flushes, water", "men, converse, engine"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["food is frying then a woman speaks", "dishes cling together then a man begins to speak"], "sample_ids": ["ukxt9I7eMMg", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["food, woman, speak", "cling, speak, dishes"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "mechanisms are operating and water is splashing "], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["some liquid flows while a woman laughs and man talks", "a baby cries and a woman speaks"], "sample_ids": ["vddP56-ogds", "tMbMDvT50j8"], "start_seconds": ["30", "12"], "properties": ["liquid, laughs, man", "a, cry, woman"], "captions_pred_video": [null, "shows a little girl covering her face with her hands while sitting at a table"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a baby cries and a woman speaks"], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "a telephone rings followed by a woman talking"], "sample_ids": ["vh30P49Po6s", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["loud, continuous, quacks", "ring, talk, woman"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "a dial tone sounds followed by a woman speaking"], "question": "which entity is quieter", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["xyx6eNVEYRY", "su6FAOcOA8c"], "start_seconds": ["380", "4"], "properties": ["loud, engine, muffles", "engine, idle, woman"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a woman is speaking and a subway train is moving "], "question": "which engine is quieter", "label": 1}, {"captions": ["a man speaks as a car is passing by", "loud ringing of a telephone stops followed by a man speaking and a digital beep"], "sample_ids": ["sK4u5T8hW78", "uzQnlJXBbOM"], "start_seconds": ["30", "50"], "properties": ["a, car, pass", "ringing, beep, stop"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "footage of a person using a cell phone on a table"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a telephone rings and a man speaks"], "question": "which entity is about a car passing by?", "label": 0}, {"captions": ["people speak softly as food sizzles", "an adult speaks and is typing on a computer keyboard"], "sample_ids": ["yhQ2Lg-7qDY", "x9JovgqUcs"], "start_seconds": ["130", "500"], "properties": ["food, sizzle, speak", "An adult is speaking, typing, and using a computer keyboard"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", null], "captions_pred_audio": ["a faucet is running and a man is speaking", "a man speaks and types on a keyboard"], "question": "which entity is speaking", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a horn rings out as a machine runs by"], "sample_ids": ["s4Uz1Ffgo04", "slZLHwNbbt4"], "start_seconds": ["100", "300"], "properties": ["roars, background, people speaking", "a, horn, run"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is quieter", "label": 1}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "a horn rings out as a machine runs by"], "sample_ids": ["wnpJndXuxLc", "slZLHwNbbt4"], "start_seconds": ["50", "300"], "properties": ["beeps, loud, whistle", "a, horn, run"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["vW4x7S1VfQc", "y8WEcpOlT3I"], "start_seconds": ["150", "40"], "properties": ["clacking, oil, woman", "harsh, wind, blows"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "on how to use a sewing machine youtube"], "captions_pred_audio": ["food sizzles in a frying pan", "a man is speaking with wind noise in the background "], "question": "which entity is a man speaking to another man?", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["w34HjHr6gAY", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["beeps, hit, woman", "music, gunfire, explosion"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["zofjfKhqLk8", "yDoT73BWsdA"], "start_seconds": ["10", "10"], "properties": ["background, metal, clings", "engine, revs, vehicle"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["food is frying and sizzles", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["zNRChLjqcU", "y8WEcpOlT3I"], "start_seconds": ["220", "40"], "properties": ["food is frying, sizzles, food", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["water is running from a faucet into a sink", "a man is speaking with wind noise in the background "], "question": "which entity is not a person?", "label": 0}, {"captions": ["animals bleat and cry out and then a woman speaks", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["yZp6xizR0yU", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["animal, bleat, cry", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a woman is speaking and a dog is whimpering"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["male speech with light ticking", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xO-Q2BlIIPU", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["male, speech, ticking", "water, radio, man"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "insects humming with a dog barking and small goat bleating"], "sample_ids": ["t25U-v4k4ts", "tIY7qOV3rEM"], "start_seconds": ["40", "0"], "properties": ["a, chirps, bird", "animal, bark, dog, barking, small, goat, bleating"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "a dog is standing in the middle of a dirt road in the woods"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a dog is barking and a cat is meowing"], "question": "which entity has more animals", "label": 1}, {"captions": ["a motor idles, accelerates, then slows down.", "an infant crying as a woman laughs"], "sample_ids": ["vYkA3cfXp5Q", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["speed, idle, accelerate", "a, laugh, infant"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["an engine is idling", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "a man speaks as a car is passing by"], "sample_ids": ["sHbXC6na9hg", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["a person, saw, wood", "a, car, pass"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["an engine is idling and vibrating", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a person cutting wood?", "label": 0}, {"captions": ["an engine works in idle nearby followed by a man talking", "a man speaks as a car is passing by"], "sample_ids": ["wqADXCzngMw", "sK4u5T8hW78"], "start_seconds": ["340", "30"], "properties": ["engine, idle, man", "a, car, pass"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a helicopter engine runs", "women speak as water runs briefly, children call out, and a man speaks"], "sample_ids": ["t5ZbXbniOWk", "uRExseg-0XI"], "start_seconds": ["30", "210"], "properties": ["engine, helicopter, run", "woman, man, water"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking while water is running and birds are chirping "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["food is frying then a woman speaks", "a child speaks in closed space"], "sample_ids": ["ukxt9I7eMMg", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["food, woman, speak", "child, space, speak"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a child speaking in a closed space?", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a mechanical buzzing getting louder"], "sample_ids": ["v0x1odnXtP0", "sEprKHm8Sj8"], "start_seconds": ["210", "90"], "properties": ["keyboard, type, computer", "noise, loud, buzzing"], "captions_pred_video": ["how to make money on youtube in spanish", "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["a person is typing on a keyboard", "a race car accelerates and revs its engine "], "question": "which is a noise", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["zFjIWfSD-4", "wz7N8YRy74I"], "start_seconds": ["410", "30"], "properties": ["People, motor, brakes", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "paper folding and crinkling"], "sample_ids": ["x6ijhqRY38s", "zPpG3RD8lSs"], "start_seconds": ["250", "20"], "properties": ["bowl, silverware, man", "paper, fold, crinkle"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "the wind blows and a mouse clicks "], "question": "which is not a person", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["wyllXV6PjKo", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["a kid, talk, cry", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a woman speaks and a baby cries", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["someone snores nearby", "a crowd yells, reacts and applauds"], "sample_ids": ["spJCm8tD9Zo", "wztCSUxOf8"], "start_seconds": ["90", "130"], "properties": ["someone snores, nearby, someone", "a crowd, yells, applauds"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "a baby laugh at a sputter"], "sample_ids": ["vSeGhaZt-aI", "sLUnaPT5gM8"], "start_seconds": ["50", "0"], "properties": ["water, bubbles, run", "laugh, sputter, baby"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is a baby?", "label": 1}, {"captions": ["a person is whistling a tune", "a stream of water flows as people talk and wind blows"], "sample_ids": ["scYRUkrFLiQ", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["a, tune, whistle", "stream, water, flow"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", "footage is blurry and out of focus"], "captions_pred_audio": ["a person whistling a song", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["an airplane engine spools and people speak", "wind blowing followed by a zoom"], "sample_ids": ["wTjoRj1se3U", "vr8ZXjEBhMQ"], "start_seconds": ["390", "150"], "properties": ["airplane, engine, spool", "wind, blow, zoom"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a jet engine is running and people are talking", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["uC9dtII1KDI", "wz7N8YRy74I"], "start_seconds": ["150", "30"], "properties": ["wind, gusts, distance", "rooster, crow, background, men"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in it?", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "a frog vocalizes as birds chirp"], "sample_ids": ["xyL9F5VrjkE", "wqUmIEzuNz4"], "start_seconds": ["20", "30"], "properties": ["engine, run, wind", "frog, bird, vocalize"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "a frog sitting in the grass on a sunny day"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a cat meows and rustles"], "question": "which entity is not a vehicle", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "birds chirp and objects are moved around"], "sample_ids": ["wvKpEYswXO0", "yPUYU6t3rwo"], "start_seconds": ["150", "370"], "properties": ["water, tap, run", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "insects buzz and a man speaks"], "question": "which entity is about moving objects around?", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "plastic is tapped on while someone speaks"], "sample_ids": ["uPDn2BFTHk", "wvKpEYswXO0"], "start_seconds": ["140", "150"], "properties": ["woman, laughs, speaks", "plastic, tap, speak"], "captions_pred_video": [null, "of the person preparing food in the kitchen"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["a man talks as several small engines run", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["u9A6VZQCZpU", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["a, man, talk", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a vehicle engine running?", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "wind blowing followed by a zoom"], "sample_ids": ["wRV8yMk886E", "vr8ZXjEBhMQ"], "start_seconds": ["0", "150"], "properties": ["liquid, spray, nozzle", "wind, blow, zoom"], "captions_pred_video": ["two cars are parked in a parking lot at night", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man speaks followed by a loud burst", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["people speak then an engine runs", "a stream of water flows as people talk and wind blows"], "sample_ids": ["uMTTDZ2mb4", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["engine, run, people", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "a man speaks as a car is passing by"], "sample_ids": ["zofjfKhqLk8", "sK4u5T8hW78"], "start_seconds": ["10", "30"], "properties": ["background, metal, clings", "a, car, pass"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["an engine runs and wind blows", "a dark barks and whimpers"], "sample_ids": ["vs65y4qmyBE", "sYj4hpDUZDQ"], "start_seconds": ["340", "30"], "properties": ["engine, run, wind", "barks, whimpers, dark"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "a brown and white dog standing in front of a wall with its mouth open"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a dog barks and a cat meows"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "an engine runs loudly"], "sample_ids": ["shmR4OZtzqA", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["man, engine, idle", "loud, engine, run"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man speaks while a motor runs", "a lawn mower is running and men are speaking "], "question": "which engine is running loudly", "label": 1}, {"captions": ["speaking following by laughing and clapping", "an insect buzzes around continuously"], "sample_ids": ["u2f5NpsoHBg", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["person, laugh, clap", "buzzes, continuously, insect"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a fly is buzzing around a microphone "], "question": "which entity is not a person?", "label": 1}, {"captions": ["a child yells and another yells", "a man speaks as a motor runs in the background"], "sample_ids": ["vMDHu7Lxcgw", "xZepNM9qcRA"], "start_seconds": ["410", "30"], "properties": ["two, yell, child", "background, motor, run"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a woman and man are speaking", "a man speaks as a vehicles passes by then a woman speaks"], "sample_ids": ["vbpKkWvfOu4", "siJFXfGWgDk"], "start_seconds": ["560", "50"], "properties": ["two people, speaking, woman, man", "man, woman, vehicle"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a man is speaking and birds are chirping in the background "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a telephone rings and a bird vocalizes", "wind blows as people chatter quietly"], "sample_ids": ["skd2PphS6oI", "xBxDz0CFVn0"], "start_seconds": ["190", "30"], "properties": ["ring, bird, vocalize", "wind, chatter, people"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", "footage is blurry and out of focus"], "captions_pred_audio": ["a telephone bell rings repeatedly ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a goat screams and people speak in the background", "a kid speaks followed by music playing"], "sample_ids": ["xC8kbrKJmco", "tQWGZLItBXk"], "start_seconds": ["0", "170"], "properties": ["background, goat, scream", "music, kid, speak"], "captions_pred_video": [null, "worms revolution screenshots"], "captions_pred_audio": ["a goat is bleating ", "a child speaks music plays video game sounds sound effects and sound effects play "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a door slams shut roughly", "a man speaks as a car is passing by"], "sample_ids": ["zkKdxzNC97Y", "sK4u5T8hW78"], "start_seconds": ["27", "30"], "properties": ["a door, slams, shut", "a, car, pass"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a beep repeats multiple times", "a man speaks with another voice speaking in the background"], "sample_ids": ["y682ml90jGw", "u21-Z5gJCB8"], "start_seconds": ["11", "30"], "properties": ["beep, repeat, multiple", "background, voice, man"], "captions_pred_video": [null, "- a person cooking eggs in a pan on the stove"], "captions_pred_audio": ["a beeping sound is being made ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a single voice speaking in the background?", "label": 0}, {"captions": ["a motorcycle engine is idling", "an airplane engine spools and people speak"], "sample_ids": ["vZAqdHZ81yA", "wTjoRj1se3U"], "start_seconds": ["180", "390"], "properties": ["engine, motorcycle, idling", "airplane, engine, spool"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["an engine is idling loudly", "a jet engine is running and people are talking"], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a person sniffs and sneezes", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["uRlbY6aoBU", "tDVADusiIoc"], "start_seconds": ["0", "60"], "properties": ["sneezes, person, sniffs", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is sneezing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a person speaking over a radio?", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "paper folding and crinkling"], "sample_ids": ["xKB8O8LTs6s", "zPpG3RD8lSs"], "start_seconds": ["70", "20"], "properties": ["music, gunfire, explosion", "paper, fold, crinkle"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "the wind blows and a mouse clicks "], "question": "which entity is more likely to be a movie", "label": 0}, {"captions": ["a drill runs and two people laugh", "someone is typing on a computer keyboard"], "sample_ids": ["tEE3MpBt1sg", "v0x1odnXtP0"], "start_seconds": ["50", "210"], "properties": ["two people, laugh, drill", "keyboard, type, computer"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "how to make money on youtube in spanish"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a person is typing on a keyboard"], "question": "which is not a drill", "label": 1}, {"captions": ["an adult woman and an adult man speak", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["zTLVJCo4WEE", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["two people, adult, speak", "rooster, crow, background, men"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["water splashes and a door squeaks", "a woman speaks as she rubs two objects together"], "sample_ids": ["sdXV-ylviw", "vzxHnu-SFEw"], "start_seconds": ["190", "80"], "properties": ["sound, splash, door", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a dog barks and taps with background noise ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity has a door?", "label": 0}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "small dogs yip and bark sharply"], "sample_ids": ["tDVADusiIoc", "v-wcQf4BDY0"], "start_seconds": ["60", "120"], "properties": ["water, radio, man", "bark, yip, sharply"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a drill runs and two people laugh", "water flows as men speak and yell"], "sample_ids": ["tEE3MpBt1sg", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["two people, laugh, drill", "water, flow, men"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows a drill running and two people laughing?", "label": 0}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "a person is burping while a girl speaks"], "sample_ids": ["tDVADusiIoc", "vdoxuJn9lTc"], "start_seconds": ["60", "40"], "properties": ["wind, radio, waves", "person, burp, girl"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "a group of young girls playing a video game together in a living room"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a child speaks followed by a burp"], "question": "which entity is a person speaking over a radio?", "label": 0}, {"captions": ["continuous snoring", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["sLkeqCDJIyw", "ziUT9IFTkjg"], "start_seconds": ["120", "10"], "properties": ["loud, snoring, noise", "background, birds, rustling"], "captions_pred_video": [", what is the man doing on the couch? sleeping", null], "captions_pred_audio": ["a person is snoring loudly", "birds are chirping and a chime is ringing "], "question": "which noise is quieter", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["w34HjHr6gAY", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["beeps, hit, woman", "People, motor, brakes"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", null], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running?", "label": 1}, {"captions": ["a vehicle engine accelerating then running on idle", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vYkA3cfXp5Q", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["engine, accelerate, idle", "music, gunfire, explosion"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["an engine is idling", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "race cars go around a track as a man commentates"], "sample_ids": ["y8WEcpOlT3I", "uZesmtKZGSw"], "start_seconds": ["40", "250"], "properties": ["harsh, wind, blows", "car, track, man"], "captions_pred_video": ["on how to use a sewing machine youtube", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about cars going around a track?", "label": 1}, {"captions": ["a person is snoring while sleeping", "vehicles pass by on a roadway"], "sample_ids": ["vJrjSeP17yE", "tgbONvsP47Y"], "start_seconds": ["40", "0"], "properties": ["a person is sleeping, snoring, person", "pass, vehicle, roadway"], "captions_pred_video": ["a black background with a small plane flying in the sky", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a person snoring loudly", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a toilet flushes and water drains", "someone is typing on a computer keyboard"], "sample_ids": ["sfAvvZwdLCY", "v0x1odnXtP0"], "start_seconds": ["20", "210"], "properties": ["water drains, flushes, water", "keyboard, type, computer"], "captions_pred_video": ["footage of the toilet in the bathroom", "how to make money on youtube in spanish"], "captions_pred_audio": ["a toilet is flushed", "a person is typing on a keyboard"], "question": "which object is used to type on a computer", "label": 1}, {"captions": ["a man speaks as a machine runs", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vD6lYD1l0BY", "w5W5Kqtc8E"], "start_seconds": ["330", "100"], "properties": ["a, machine, run", "wind, blow, vehicle"], "captions_pred_video": ["game controller being held in the hands of the person", null], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a man speaking as a machine runs?", "label": 0}, {"captions": ["a baby cries and wails as an adult female speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["zliInBdC98Y", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["a, baby, cries, wails", "people, applaud, hoot"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", null], "captions_pred_audio": ["a baby cries and a woman speaks", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motorcycle engine is idling", "a person is burping while a girl speaks"], "sample_ids": ["vZAqdHZ81yA", "vdoxuJn9lTc"], "start_seconds": ["180", "40"], "properties": ["engine, motorcycle, idling", "person, burp, girl"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "a group of young girls playing a video game together in a living room"], "captions_pred_audio": ["an engine is idling loudly", "a child speaks followed by a burp"], "question": "which entity is not a person?", "label": 0}, {"captions": ["an infant crying frantically", "some men converse over an engine running"], "sample_ids": ["zwOBqeFTgiU", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["cry, infant, frantically", "men, converse, engine"], "captions_pred_video": ["of the baby crying in the car seat", null], "captions_pred_audio": ["a baby cries loudly", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a woman speaks and other women and a man talk with her", "a woman speaks as she rubs two objects together"], "sample_ids": ["vbpKkWvfOu4", "vzxHnu-SFEw"], "start_seconds": ["560", "80"], "properties": ["a, woman, man", "two objects, woman, speak"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a woman is speaking and breathing with mechanisms in the background "], "question": "which woman is speaking", "label": 1}, {"captions": ["an adult woman and an adult man speak", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zTLVJCo4WEE", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["two people, adult, speak", "applause, audience, yells"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["various birds chirp and squeal, and an animal grunts", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["tDlysoZiA1I", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["animal, grunts, chirps", "animal, grunts, snorts"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a woman is speaking and a baby is crying"], "question": "which entity has more grunts", "label": 1}, {"captions": ["a toilet flushes and water drains", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sfAvvZwdLCY", "wz7N8YRy74I"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "rooster, crow, background, men"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man speaking to a rooster?", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "water is sprayed across a hard surface"], "sample_ids": ["uC9dtII1KDI", "sQwlkXjQabo"], "start_seconds": ["150", "10"], "properties": ["wind, gusts, distance", "water, spray, surface"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["siJFXfGWgDk", "xKB8O8LTs6s"], "start_seconds": ["50", "70"], "properties": ["man, woman, vehicle", "music, gunfire, explosion"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more action", "label": 1}, {"captions": ["a person is snoring while sleeping", "continuous snoring"], "sample_ids": ["vJrjSeP17yE", "sLkeqCDJIyw"], "start_seconds": ["40", "120"], "properties": ["a person is sleeping, snoring, person", "loud, snoring, noise"], "captions_pred_video": ["a black background with a small plane flying in the sky", ", what is the man doing on the couch? sleeping"], "captions_pred_audio": ["a person snoring loudly", "a person is snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["wind blows and a stream of water flows nearby", "some men converse over an engine running"], "sample_ids": ["sYITalLZjj4", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["stream, flow, wind", "men, converse, engine"], "captions_pred_video": ["two ducks are swimming in the water near each other", null], "captions_pred_audio": ["wind blows and birds chirp", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a man talks while vehicles pass by", "people applaud and hoot and chat quietly"], "sample_ids": ["sK4u5T8hW78", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["a, man, talk", "people, applaud, hoot"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "a duck quacks loudly and continuously"], "sample_ids": ["vfYTJq7nU", "vh30P49Po6s"], "start_seconds": ["130", "30"], "properties": ["ducks, quack, man", "loud, continuous, quacks"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a duck is quacking loudly"], "question": "which duck is quacking loudly", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "paper is crumpling consistently"], "sample_ids": ["wz7N8YRy74I", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["rooster, crow, background, people", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["w2M4i1mklOA", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["alarm, gears, turn", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of an antique clock", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["wvKpEYswXO0", "yDoT73BWsdA"], "start_seconds": ["150", "10"], "properties": ["water, tap, run", "engine, revs, vehicle"], "captions_pred_video": ["of the person preparing food in the kitchen", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a race car accelerates and revs its engine "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a clock ticktocks", "a stream of water flows quickly"], "sample_ids": ["v-g-j2uTByM", "wbHTKEJZyhc"], "start_seconds": ["30", "20"], "properties": ["ticktocks, clock, ticktocks", "stream, water, flow"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and"], "captions_pred_audio": ["a clock is ticking loudly", "a waterfall is flowing and people are speaking "], "question": "which entity is moving", "label": 1}, {"captions": ["a heavy rain falls endlessly", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["wP8ZKrlx3oA", "xfaoyyzw2WU"], "start_seconds": ["40", "180"], "properties": ["heavy, rain, fall", "loud, jet engine, roar"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a heavy rain is falling on a surface", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["y2ZBGpgbhHM", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["birds, tweet, pant", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds chirping and a dog panting", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 0}, {"captions": ["a vehicle engine runs and someone speaks", "an infant crying as a woman laughs"], "sample_ids": ["zF8yoL0rkbI", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["engine, run, someone", "a, laugh, infant"], "captions_pred_video": ["footage of the traffic on the street at night", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by an electronic beep", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["wSVhSdj0F0", "vbZ-0lGPneg"], "start_seconds": ["10", "30"], "properties": ["horn honks, keys jingle, electronic beep", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "a man speaks as a car is passing by"], "sample_ids": ["t97k0cejSQE", "sK4u5T8hW78"], "start_seconds": ["250", "30"], "properties": ["bird, chirp, insect", "a, car, pass"], "captions_pred_video": ["a bee on a purple thistle flower", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a moving object", "label": 1}, {"captions": ["birds vocalize and a man speaks", "a man speaks with another voice speaking in the background"], "sample_ids": ["v0wPrLBI3hg", "u21-Z5gJCB8"], "start_seconds": ["30", "30"], "properties": ["vocalize, bird, speak", "background, voice, man"], "captions_pred_video": ["footage of the pigeons feeding on the ground", "- a person cooking eggs in a pan on the stove"], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a man speaking to birds?", "label": 0}, {"captions": ["an engine runs and wind blows", "wind blows as people chatter quietly"], "sample_ids": ["vs65y4qmyBE", "xBxDz0CFVn0"], "start_seconds": ["340", "30"], "properties": ["engine, run, wind", "wind, chatter, people"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "footage is blurry and out of focus"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "people applaud and hoot and chat quietly"], "sample_ids": ["vXlk0lIQBFo", "wwyfGO2J4"], "start_seconds": ["470", "90"], "properties": ["wind, talk, vocalize", "people, applaud, hoot"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", null], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "winds blows roughly as a vehicle races past"], "sample_ids": ["vJvryTwuAV8", "xjvTpk2Zpr8"], "start_seconds": ["16", "70"], "properties": ["audience, cheer, man", "wind, blows, vehicle"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "a guy speaks with birds chirping in the background"], "sample_ids": ["vBslzh7saPw", "v5P-ThUCINM"], "start_seconds": ["90", "400"], "properties": ["power, scream, increase", "background, chirp, bird"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man is speaking and birds are chirping"], "question": "which entity is quieter", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "wind blows as people chatter quietly"], "sample_ids": ["xyL9F5VrjkE", "xBxDz0CFVn0"], "start_seconds": ["20", "30"], "properties": ["wind, blows, vehicle", "wind, chatter, people"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage is blurry and out of focus"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "a vehicle engine accelerating then running on idle"], "sample_ids": ["uiItxDsDMFI", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["wood, piece, saw", "engine, accelerate, idle"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a saw is being used with background noise ", "an engine is idling"], "question": "which entity is a machine", "label": 1}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "water flows as men speak and yell"], "sample_ids": ["yI-KvObbDoY", "vJ7JPEFhyLA"], "start_seconds": ["260", "16"], "properties": ["sound, smack, wind", "water, flow, men"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of water flowing?", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["w0xsN8X18Y", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["music, surface, rain", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a duck quacks and a woman speaks"], "question": "which entity is about ducks?", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "a horn rings out as a machine runs by"], "sample_ids": ["s4Uz1Ffgo04", "slZLHwNbbt4"], "start_seconds": ["100", "300"], "properties": ["water, rushes, motorcycle", "a, horn, run"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["t25U-v4k4ts", "vbZ-0lGPneg"], "start_seconds": ["40", "30"], "properties": ["a, chirps, bird", "a woman, a television program, a bird"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird that chirps?", "label": 0}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["zofjfKhqLk8", "vbZ-0lGPneg"], "start_seconds": ["10", "30"], "properties": ["background, metal, clank", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird in it?", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "people applaud and hoot and chat quietly"], "sample_ids": ["zY3icUyMdh8", "wwyfGO2J4"], "start_seconds": ["20", "90"], "properties": ["dog, bark, engine", "people, applaud, hoot"], "captions_pred_video": ["footage of a bus driving through a residential street at night", null], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person speaks briefly", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["zOZleIRqZm4", "uYT5gxnyMWM"], "start_seconds": ["80", "50"], "properties": ["person, talk, brief", "a, scream, girl"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a recording of a person talking?", "label": 0}, {"captions": ["a man rubs two objects together then speaks", "a car accelerates and wind blows"], "sample_ids": ["vveS8HT7Uog", "u0TrcHhkPQ"], "start_seconds": ["100", "20"], "properties": ["a man, objects, speak", "accelerates, wind, blows"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", null], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a race car accelerates and revs its engine "], "question": "which object is moving", "label": 1}, {"captions": ["a telephone rings and a bird vocalizes", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["skd2PphS6oI", "zl9Dqx-j7q4"], "start_seconds": ["190", "6"], "properties": ["ring, bird, vocalize", "engine, laugh, loud"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a telephone bell rings repeatedly ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a vehicle engine accelerating then running on idle", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["vYkA3cfXp5Q", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["engine, accelerate, idle", "engine, revs, vehicle"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["an engine is idling", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle engine", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sHbXC6na9hg", "wz7N8YRy74I"], "start_seconds": ["0", "30"], "properties": ["a person, saw, wood", "rooster, crow, background, men"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["an engine is idling and vibrating", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is about a person cutting wood?", "label": 0}, {"captions": ["goats bleat and metal clings", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["tH17JPjDPnc", "uYT5gxnyMWM"], "start_seconds": ["260", "50"], "properties": ["bleat, metal, clings", "female, spraying, scream"], "captions_pred_video": ["feed of the goats eating hay in the barn", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a person speaks briefly", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["zOZleIRqZm4", "vbZ-0lGPneg"], "start_seconds": ["80", "30"], "properties": ["person, talk, brief", "a woman, a television program, a bird"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking and a dog is whimpering"], "question": "which entity has more people talking", "label": 1}, {"captions": ["a male speaks and another male speaks", "winds blows roughly as a vehicle races past"], "sample_ids": ["viuTg1M-dqg", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["two males, speaking, male", "wind, blows, vehicle"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a helicopter engine runs continuously", "a duck quacks loudly and continuously"], "sample_ids": ["ugHJF0hfYkg", "vh30P49Po6s"], "start_seconds": ["10", "30"], "properties": ["engine, running, continuously", "loud, continuous, quacks"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a helicopter is flying overhead ", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a clock ticktocks in wind", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["yVumC9TGknc", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["ticktocks, clock, wind", "engine, laugh, loud"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a series of beeps and chirps", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "a car accelerates and wind blows"], "sample_ids": ["w34HjHr6gAY", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["beeps, squawk, child speaking", "accelerates, wind, blows"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", null], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zkKdxzNC97Y", "yajyRTUQk3U"], "start_seconds": ["27", "400"], "properties": ["hard, surface, door", "a woman, something, fried"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a door is opened and closed", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["tIY7qOV3rEM", "ziUT9IFTkjg"], "start_seconds": ["0", "10"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "background, birds, rustling"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", null], "captions_pred_audio": ["a dog is barking and a cat is meowing", "birds are chirping and a chime is ringing "], "question": "which entity has a background of birds chirping?", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "an engine runs loudly"], "sample_ids": ["zofjfKhqLk8", "vqZuVbG6-HI"], "start_seconds": ["10", "130"], "properties": ["background, metal, clank", "loud, engine, run"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a lawn mower is running and men are speaking "], "question": "which is louder", "label": 1}, {"captions": ["paper is crumpling consistently", "some men converse over an engine running"], "sample_ids": ["v5cSxLaHADY", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "men, converse, engine"], "captions_pred_video": ["footage of the person holding a pair of scissors", null], "captions_pred_audio": ["paper is crumpled and crinkled", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a video", "label": 1}, {"captions": ["long loud burping by a man", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["xmiUIOhtZyQ", "sLUnaPT5gM8"], "start_seconds": ["60", "0"], "properties": ["loud, burp, man", "loud, laughter, intermittent"], "captions_pred_video": ["homer simpson drinking a beer", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a person burps and music plays in the background ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wqN6IIHw3po", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["rain, surface, fall", "three men, wind, flow"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and water is splashing", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a man speaking while rain falls onto a hard surface?", "label": 0}, {"captions": ["a toilet flushes and water drains", "roadway noise occurs and a truck accelerates"], "sample_ids": ["sfAvvZwdLCY", "tgbONvsP47Y"], "start_seconds": ["20", "0"], "properties": ["water drains, flushes, water", "noise, truck, accelerate"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a toilet is flushed", "a car is driving on the road "], "question": "which entity is a source of noise", "label": 1}, {"captions": ["a woman speaks followed by clicks and scraping", "pigeons vocalize and birds chirp"], "sample_ids": ["yYJksgsxx5U", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["audio, clicks, scraping", "vocalize, bird, chirp"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "of the pigeon in the cage"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a man speaks over intermittent keyboard taps", "birds chirp and objects are moved around"], "sample_ids": ["tw76HGONaKg", "yPUYU6t3rwo"], "start_seconds": ["570", "370"], "properties": ["audio, man, keyboard", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "waves crash against a shoreline and people speak"], "sample_ids": ["slZLHwNbbt4", "yFB25fqfU8I"], "start_seconds": ["300", "300"], "properties": ["a, horn, run", "wave, crash, shoreline"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a clock ticktocks briefly", "a speedboat passes quickly on the water"], "sample_ids": ["u7C-AEBQM", "tjmoSi330GM"], "start_seconds": ["30", "23"], "properties": ["ticktocks, clock, ticktocks briefly", "speed, water, boat"], "captions_pred_video": [null, "a person riding a jet ski on a lake with trees in the background"], "captions_pred_audio": ["a ticktock of a clock", "a motorboat speeds through water with wind noise "], "question": "which is moving faster", "label": 0}, {"captions": ["a man makes an exclamation, then another man speaks", "water flows as a woman laughs and a man speaks"], "sample_ids": ["xO-Q2BlIIPU", "vddP56-ogds"], "start_seconds": ["30", "30"], "properties": ["two men, exclamation, speak", "water, flow, laugh"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "water is running and gurgling and a man is speaking"], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["a dog barks and whimpers", "frogs croak and vocalize"], "sample_ids": ["sShpyu2l4YQ", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["barks, whimpers, dog", "croak, vocalize, frog"], "captions_pred_video": ["the puppies are playing with a toy", "a close up of a frog in the water"], "captions_pred_audio": ["a dog is barking and growling", "a frog is croaking"], "question": "which animal is more vocal", "label": 1}, {"captions": ["a man speaks while water drains", "a telephone rings followed by a woman talking"], "sample_ids": ["vSeGhaZt-aI", "tGcFnX0GHI"], "start_seconds": ["50", "0"], "properties": ["water, drain, man", "ring, talk, woman"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", null], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["u5RmF3c3Aw", "zl9Dqx-j7q4"], "start_seconds": ["60", "6"], "properties": ["engine, car, zoom", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a jet engine roars "], "question": "which entity is followed by a car zooming by", "label": 0}, {"captions": ["rain falls on a surface as men speak and thunder roars", "a clock ticktocks in wind"], "sample_ids": ["w0xsN8X18Y", "yVumC9TGknc"], "start_seconds": ["30", "30"], "properties": ["rain, thunder, surface", "ticktocks, clock, wind"], "captions_pred_video": [null, "game title screen of the game shadow of the colossus on sony playstation 2"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a series of beeps and chirps"], "question": "which entity is not a clock?", "label": 0}, {"captions": ["some tunes played by whistling", "a child speaks in closed space"], "sample_ids": ["u6BnG6YZqJ4", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["tune, play, whistling", "child, space, speak"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a person whistling a song", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vBslzh7saPw", "su6FAOcOA8c"], "start_seconds": ["90", "4"], "properties": ["power, scream, increase", "engine, idle, woman"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a woman is speaking and a subway train is moving "], "question": "which engine is quieter", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "vehicles pass by on a roadway"], "sample_ids": ["rwtmaKiCcQU", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["nozzle, depressed, spray can", "pass, vehicle, roadway"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "footage of a fire truck entering a garage"], "captions_pred_audio": ["spraying and people speaking", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xl2PIWyXaM", "tDVADusiIoc"], "start_seconds": ["160", "60"], "properties": ["chirp, man, younger person", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["birds are chirping and people are talking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "someone whistles a tune"], "sample_ids": ["t69a8aRKhmc", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["a, b, c", "someone, tune, whistle"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a person whistling a song"], "question": "which entity has a tune", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "an engine runs loudly"], "sample_ids": ["wAAkbZToh8", "vqZuVbG6-HI"], "start_seconds": ["0", "130"], "properties": ["burp, laugh, speak", "loud, engine, run"], "captions_pred_video": [null, "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man burps and a woman speaks", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["birds fly and flutter around", "pigeons vocalize and birds chirp"], "sample_ids": ["wGKgwOP3h30", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["fly, flutter, around", "vocalize, bird, chirp"], "captions_pred_video": ["of the pigeons in the coop", "of the pigeon in the cage"], "captions_pred_audio": ["pigeons coo and flap their wings", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a toilet flushes and a female speaks"], "sample_ids": ["wz7N8YRy74I", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["rooster, crow, background, people", "female, flushes, toilet"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a toilet flushes and a man speaks"], "question": "which entity has a female speaking?", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "a man speaks as a car is passing by"], "sample_ids": ["spYNpeN7rPY", "sK4u5T8hW78"], "start_seconds": ["1", "30"], "properties": ["a clock, ticktock, man", "a, car, pass"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a clock?", "label": 0}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "small dogs yip and bark sharply"], "sample_ids": ["y2ZBGpgbhHM", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["birds, tweet, pant", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["birds chirping and a dog panting", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "an engine runs loudly"], "sample_ids": ["zl9Dqx-j7q4", "vqZuVbG6-HI"], "start_seconds": ["6", "130"], "properties": ["engine, laugh, loud", "loud, engine, run"], "captions_pred_video": ["footage of a man driving a car in the dark", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a jet engine roars ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vddP56-ogds", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["water, flow, laugh", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "an airplane engine spools and people speak"], "sample_ids": ["uYT5gxnyMWM", "wTjoRj1se3U"], "start_seconds": ["50", "390"], "properties": ["female, spraying, scream", "airplane, engine, spool"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a jet engine is running and people are talking"], "question": "which entity is a video of a person speaking and then spraying and screaming?", "label": 0}, {"captions": ["multiple people speak and children yell while water gurgles", "paper folding and crinkling"], "sample_ids": ["vb1fPSDI4c", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["multiple, people, yell", "paper, fold, crinkle"], "captions_pred_video": [null, "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a crowd of people are talking and laughing", "the wind blows and a mouse clicks "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a goat screams and people speak in the background", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["xC8kbrKJmco", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["background, goat, scream", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a goat is bleating ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "water flows as men speak and yell"], "sample_ids": ["wvKpEYswXO0", "vJ7JPEFhyLA"], "start_seconds": ["150", "16"], "properties": ["plastic, tap, speak", "water, flow, men"], "captions_pred_video": ["of the person preparing food in the kitchen", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a liquid", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "paper folding and crinkling"], "sample_ids": ["xjhAnI2q6hM", "zPpG3RD8lSs"], "start_seconds": ["6", "20"], "properties": ["engine revs, vehicle, people", "paper, fold, crinkle"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "the wind blows and a mouse clicks "], "question": "which entity is a static object", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "a infant makes noise and is excited"], "sample_ids": ["vms5XGTDVQc", "wIJK3-5y0kA"], "start_seconds": ["220", "30"], "properties": ["paper, crumpled, crinkled", "noise, excited, infant"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["paper is crumpled and crinkled", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["yDoT73BWsdA", "vfYTJq7nU"], "start_seconds": ["10", "130"], "properties": ["engine, revs, vehicle", "rustling, ducks, quack"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "an engine starts and increases in power"], "sample_ids": ["spJCm8tD9Zo", "zjTG0gaGCUI"], "start_seconds": ["90", "80"], "properties": ["snores, wheezes, sleeps", "power, increase, engine"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a jet engine roars as wind blows "], "question": "which entity is a source of power", "label": 1}, {"captions": ["a sleeping person emits a gravely snore", "a person snores loudly multiple times at a close distance"], "sample_ids": ["w2JXXIAdUdg", "sSMl2vc3ek"], "start_seconds": ["10", "20"], "properties": ["emits, sleeping, person", "loud, multiple, distance"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", null], "captions_pred_audio": ["a person snoring and a dog whimpering", "a person snoring loudly"], "question": "which person is emitting a snore", "label": 1}, {"captions": ["a clock ticktocks briefly", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["u7C-AEBQM", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["ticktocks, clock, ticktocks briefly", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a ticktock of a clock", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking and spraying?", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "a woman speaks happily and an animal chirps"], "sample_ids": ["wRBHTgrbiwg", "uWAAAL4CIoc"], "start_seconds": ["50", "0"], "properties": ["bird, owl, speak", "a woman, chirps, animal"], "captions_pred_video": ["of a bee pollinating the flowers in the field", null], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a woman is speaking and a dog is barking "], "question": "which entity has a man speaking briefly?", "label": 0}, {"captions": ["a male is speaking and a duck quacks as others laugh", "a clock ticktocks and sounds an alarm then a man laughs"], "sample_ids": ["tiDFTC-5vU", "xV7Mg1QucSc"], "start_seconds": ["30", "14"], "properties": ["male, duck, laugh", "alarm, ticktocks, laughs"], "captions_pred_video": [null, "a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "an alarm clock ticks and a woman laughs"], "question": "which entity is about a clock ticktocking and a man laughing?", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "a motorcycle engine is revving while people are speaking"], "sample_ids": ["sxYkFKFIZD0", "y8dSeubCNI"], "start_seconds": ["20", "4"], "properties": ["screech, man, door", "engine revving, people speaking, motorcycle"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", null], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "an engine revving and people talking in the background"], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["waves of water rumble", "a mechanical buzzing getting louder"], "sample_ids": ["vwqaIHKxLvM", "sEprKHm8Sj8"], "start_seconds": ["20", "90"], "properties": ["sound, wave, water", "noise, loud, buzzing"], "captions_pred_video": ["of a surfer riding a big wave in the ocean", "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["waves crash and wind blows ", "a race car accelerates and revs its engine "], "question": "which entity is a noise", "label": 1}, {"captions": ["an insect buzzes around continuously", "people speaking indiscriminately in the distance with a person snoring loudly nearby"], "sample_ids": ["v25l1jef3JY", "w2JXXIAdUdg"], "start_seconds": ["0", "10"], "properties": ["buzzes, continuously, insect", "snoring, distance, person"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "a close up shot of a person's mouth with a toothbrush in it"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a person snoring and a dog whimpering"], "question": "which entity is louder", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vfYTJq7nU", "ukg5L09Wpvo"], "start_seconds": ["130", "150"], "properties": ["rustling, ducks, quack", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["ukxt9I7eMMg", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["food, pan, cook", "loud, multiple, distance"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a person snoring loudly"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a helicopter engine runs continuously", "an insect buzzes around continuously"], "sample_ids": ["ugHJF0hfYkg", "v25l1jef3JY"], "start_seconds": ["10", "0"], "properties": ["engine, running, continuously", "buzzes, continuously, insect"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a helicopter is flying overhead ", "a fly is buzzing around a microphone "], "question": "which entity is not a helicopter?", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "waves crash against a shoreline and people speak"], "sample_ids": ["spJCm8tD9Zo", "yFB25fqfU8I"], "start_seconds": ["90", "300"], "properties": ["snores, wheezes, sleeps", "wave, crash, shoreline"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more calm", "label": 0}, {"captions": ["men speak and a nozzle sprays liquid", "a vehicle engine accelerates and wind blows"], "sample_ids": ["wRV8yMk886E", "wudZTNBtVqc"], "start_seconds": ["0", "60"], "properties": ["liquid, spray, nozzle", "accelerates, engine, wind"], "captions_pred_video": ["two cars are parked in a parking lot at night", "footage is of a parking lot with cars parked in it"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["people speak then an engine runs", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["uMTTDZ2mb4", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["engine, run, people", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program playing?", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["wvKpEYswXO0", "wz7N8YRy74I"], "start_seconds": ["150", "30"], "properties": ["water, tap, run", "rooster, crow, background, men"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is about a rooster?", "label": 1}, {"captions": ["a train engine runs and a horn blows", "a child speaks in closed space"], "sample_ids": ["zPX9o1uDiI", "yW6FWLSLkx4"], "start_seconds": ["40", "40"], "properties": ["engine, horn, run", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "a stream of water runs briefly"], "sample_ids": ["zcDwZ6W7E3E", "x-PeY8Yb8M4"], "start_seconds": ["180", "300"], "properties": ["man, speak, motorcycles", "stream, water, run"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["someone snores nearby", "a stream of water runs briefly"], "sample_ids": ["spJCm8tD9Zo", "x-PeY8Yb8M4"], "start_seconds": ["90", "300"], "properties": ["someone snores, nearby, someone", "stream, water, run"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a person is snoring loudly", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "winds blows roughly as a vehicle races past"], "sample_ids": ["vzxHnu-SFEw", "xjvTpk2Zpr8"], "start_seconds": ["80", "70"], "properties": ["two objects, woman, speak", "wind, blows, vehicle"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["male speech with light ticking", "a telephone rings followed by a woman talking"], "sample_ids": ["xO-Q2BlIIPU", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["male, speech, ticking", "ring, talk, woman"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["a cat meows and children speak", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["x5cuQjOdM3E", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["cat, speak, children", "a woman, laughs, animal"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a cat meows and a woman speaks", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "water pouring and bubbling"], "sample_ids": ["v5P-ThUCINM", "uyRfq-jKPpo"], "start_seconds": ["400", "50"], "properties": ["background, chirp, bird", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and birds are chirping", "water is running from a faucet"], "question": "which entity is more likely to be in a bathroom", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["tDVADusiIoc", "tDVADusiIoc"], "start_seconds": ["60", "60"], "properties": ["man, radio, blows", "water, radio, man"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more likely to be in a storm", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "a machine beeps continuously"], "sample_ids": ["vveS8HT7Uog", "y682ml90jGw"], "start_seconds": ["100", "11"], "properties": ["a man, objects, speak", "beeps, machine, continuously"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", null], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "an infant crying as a woman laughs"], "sample_ids": ["uPDn2BFTHk", "xhmRY9yhC7c"], "start_seconds": ["140", "20"], "properties": ["woman, laughs, speaks", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a baby cries and a woman speaks"], "question": "which woman is laughing", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "wind blows and women speak as livestock vocalizes"], "sample_ids": ["vb1fPSDI4c", "vXlk0lIQBFo"], "start_seconds": ["30", "470"], "properties": ["multiple, people, yell", "wind, speak, vocalize"], "captions_pred_video": [null, "- a woman and two donkeys in a fenced in area"], "captions_pred_audio": ["a crowd of people are talking and laughing", "wind chimes are ringing and people are speaking and laughing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "birds chirp and objects are moved around"], "sample_ids": ["x6ijhqRY38s", "yPUYU6t3rwo"], "start_seconds": ["250", "370"], "properties": ["something metal, glass, hit", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["birds chirp and wind blows", "several insects fly while two men talk"], "sample_ids": ["sxIvBMSavMQ", "s-T9OVOiMLo"], "start_seconds": ["210", "330"], "properties": ["birds, chirp, wind", "several, fly, men"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a man woman speak while crickets sing", "a man speaks followed by another man speaking outside"], "sample_ids": ["zTLVJCo4WEE", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["a, crickets, sing", "two men, speak, follow"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking followed by another man speaking outside?", "label": 1}, {"captions": ["a drill runs and two people laugh", "an engine runs loudly"], "sample_ids": ["tEE3MpBt1sg", "vqZuVbG6-HI"], "start_seconds": ["50", "130"], "properties": ["two people, laugh, drill", "loud, engine, run"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage is blurry because it's raining outside"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sTpirNYo8vQ", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["a, tone, fast", "engine, idle, woman"], "captions_pred_video": ["of a man taking a selfie on a bus", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a woman is speaking and a subway train is moving "], "question": "which woman is speaking", "label": 1}, {"captions": ["a weapon fires multiple times", "several insects fly while two men talk"], "sample_ids": ["sMC07Ucy7kg", "s-T9OVOiMLo"], "start_seconds": ["10", "330"], "properties": ["weapon, fire, multiple", "several, fly, men"], "captions_pred_video": ["footage is from a car's point of view", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is not a weapon", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["xZepNM9qcRA", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["background, motor, run", "airplane, boy, fly"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a woman speaks while a helicopter flies overhead "], "question": "which entity has a motor running in the background?", "label": 0}, {"captions": ["a rumble grows louder", "water pouring and bubbling"], "sample_ids": ["y4MY9mp8-TA", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["loudness, increase, rumble", "water, bubbles, pouring"], "captions_pred_video": ["a helicopter flying in the sky", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a helicopter flies overhead ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["xyL9F5VrjkE", "zj2R0XoFr5k"], "start_seconds": ["20", "50"], "properties": ["wind, motor, distance", "airplane, boy, fly"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "three men talk while wind blows and some liquid flows"], "sample_ids": ["w34HjHr6gAY", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["beeps, squawk, child speaking", "three men, wind, flow"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "some men converse over an engine running"], "sample_ids": ["tOj4tdLRaA", "sCiy7QS1U"], "start_seconds": ["70", "300"], "properties": ["woman, laugh, baby", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a woman speaking and a baby laughing?", "label": 0}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["wnpJndXuxLc", "vfYTJq7nU"], "start_seconds": ["50", "130"], "properties": ["beeps, loud, whistle", "rustling, ducks, quack"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", null], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a duck quacks and a woman speaks"], "question": "which entity is more natural", "label": 1}, {"captions": ["frogs croak and vocalize", "someone whistles a tune"], "sample_ids": ["yswmmRZFItk", "sIXTftIuUgw"], "start_seconds": ["0", "90"], "properties": ["croak, vocalize, frog", "someone, tune, whistle"], "captions_pred_video": ["a close up of a frog in the water", null], "captions_pred_audio": ["a frog is croaking", "a person whistling a song"], "question": "which entity is a human", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a frog croaks as other frogs croak in the background"], "sample_ids": ["wvKpEYswXO0", "yswmmRZFItk"], "start_seconds": ["150", "0"], "properties": ["sound, water, running", "background, frog, croak"], "captions_pred_video": ["of the person preparing food in the kitchen", "a close up of a frog in the water"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a frog is croaking"], "question": "which entity has a background of frogs?", "label": 1}, {"captions": ["a child babbles as a woman speaks", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["wEBlkGWVWwE", "sLUnaPT5gM8"], "start_seconds": ["260", "0"], "properties": ["a, babble, woman", "loud, laughter, intermittent"], "captions_pred_video": ["shows a person writing on the whiteboard", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a race car approaches quickly and slows down squealing tires", "a clock ticktocks"], "sample_ids": ["sEprKHm8Sj8", "v-g-j2uTByM"], "start_seconds": ["90", "30"], "properties": ["car, tires, slows", "ticktocks, clock, ticktocks"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "a man speaks as a motor runs in the background"], "sample_ids": ["y1saVTXsKwc", "xZepNM9qcRA"], "start_seconds": ["80", "30"], "properties": ["a, dog, talk", "background, motor, run"], "captions_pred_video": ["a dog playing with a pink ball", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a dog barks and a man speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["tEE3MpBt1sg", "yDoT73BWsdA"], "start_seconds": ["50", "10"], "properties": ["drill, something, laugh", "engine, revs, vehicle"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a race car accelerates and revs its engine "], "question": "which is a vehicle", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "pigeons vocalize and birds chirp"], "sample_ids": ["s7knHCFW82w", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["blow horn, get close, train", "vocalize, bird, chirp"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "of the pigeon in the cage"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a train?", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "three men talk while wind blows and some liquid flows"], "sample_ids": ["xO-Q2BlIIPU", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["two men, exclamation, speak", "three men, wind, flow"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more people", "label": 1}, {"captions": ["a small engine idles continuously", "a horn rings out as a machine runs by"], "sample_ids": ["y5WII6cTH7k", "slZLHwNbbt4"], "start_seconds": ["40", "300"], "properties": ["engine, idle, continuously", "a, horn, run"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is moving", "label": 1}, {"captions": ["a car accelerates and wind blows", "someone is typing on a computer keyboard"], "sample_ids": ["u0TrcHhkPQ", "v0x1odnXtP0"], "start_seconds": ["20", "210"], "properties": ["accelerates, wind, blows", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a person is typing on a keyboard"], "question": "which object is moving", "label": 0}, {"captions": ["a dog barks and whimpers", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sShpyu2l4YQ", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["barks, whimpers, dog", "engine, idle, woman"], "captions_pred_video": ["the puppies are playing with a toy", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a dog is barking and growling", "a woman is speaking and a subway train is moving "], "question": "which entity is a human", "label": 1}, {"captions": ["people speak as gunfire rings out", "an engine runs loudly"], "sample_ids": ["wqTCwqVRDlk", "vqZuVbG6-HI"], "start_seconds": ["80", "130"], "properties": ["gunfire, ring, speak", "loud, engine, run"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking and a gun is fired", "a lawn mower is running and men are speaking "], "question": "which is louder", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "music plays followed by gunshots and then an explosion"], "sample_ids": ["uC9dtII1KDI", "xKB8O8LTs6s"], "start_seconds": ["150", "70"], "properties": ["wind, gusts, distance", "music, gunshots, explosion"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["male speech with light ticking", "a vehicle is skidding and squealing tires"], "sample_ids": ["xO-Q2BlIIPU", "soTOh3zYJfY"], "start_seconds": ["30", "40"], "properties": ["male, speech, ticking", "vehicle, skid, tires"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "a red car drifting on a winding road with smoke coming out of it"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "some men converse over an engine running"], "sample_ids": ["xfudFO976zE", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["animal, bleats, cry", "men, converse, engine"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a mechanical buzzing getting louder", "paper folding and crinkling"], "sample_ids": ["sEprKHm8Sj8", "zPpG3RD8lSs"], "start_seconds": ["90", "20"], "properties": ["noise, loud, buzzing", "paper, fold, crinkle"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "the wind blows and a mouse clicks "], "question": "which entity is not a noise", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["y2ZBGpgbhHM", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["birds, tweet, pant", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["birds chirping and a dog panting", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["sa6TLVbooCc", "uYT5gxnyMWM"], "start_seconds": ["240", "50"], "properties": ["people, laugh, child", "a, scream, girl"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a woman is speaking and a baby is crying"], "question": "which entity has a child speaking?", "label": 0}, {"captions": ["animals bleat and cry out and then a woman speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["yZp6xizR0yU", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["animal, bleat, cry", "people, applaud, hoot"], "captions_pred_video": ["footage of a woman feeding goats in a barn", null], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds fly and flutter around", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wGKgwOP3h30", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["fly, flutter, around", "applause, audience, yells"], "captions_pred_video": ["of the pigeons in the coop", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["pigeons coo and flap their wings", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a drill runs and two people laugh", "dishes cling together then a man begins to speak"], "sample_ids": ["tEE3MpBt1sg", "sQGXqGcwOTc"], "start_seconds": ["50", "3"], "properties": ["two people, laugh, drill", "cling, speak, dishes"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "mechanisms are operating and water is splashing "], "question": "which entity is about a drill?", "label": 0}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "a stream of water flows as people talk and wind blows"], "sample_ids": ["wSVhSdj0F0", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["beep, clang, footsteps", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing?", "label": 1}, {"captions": ["a motorcycle engine is idling", "a machine beeps continuously"], "sample_ids": ["vZAqdHZ81yA", "y682ml90jGw"], "start_seconds": ["180", "11"], "properties": ["engine, motorcycle, idling", "beeps, machine, continuously"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", null], "captions_pred_audio": ["an engine is idling loudly", "a beeping sound is being made "], "question": "which entity is not a motorcycle?", "label": 1}, {"captions": ["a frog vocalizes while birds chirp", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vMf1dLD6Sng", "sSMl2vc3ek"], "start_seconds": ["6", "20"], "properties": ["frog, bird, vocalize", "loud, multiple, distance"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", null], "captions_pred_audio": ["a frog croaks loudly", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "several insects fly while two men talk"], "sample_ids": ["vddP56-ogds", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["liquid, laughs, man", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more insects", "label": 1}, {"captions": ["a woman and man speak while food is frying", "people speak as gunfire rings out"], "sample_ids": ["zk-xJGQU8-4", "wqTCwqVRDlk"], "start_seconds": ["130", "80"], "properties": ["food, man, woman", "gunfire, ring, speak"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "multiple people speak and children yell while water gurgles"], "sample_ids": ["uC9dtII1KDI", "vb1fPSDI4c"], "start_seconds": ["150", "30"], "properties": ["wind, gusts, distance", "multiple, people, yell"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", null], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["an engine starts and increases in power", "two men speak as a buffeting wind blows"], "sample_ids": ["zjTG0gaGCUI", "y8WEcpOlT3I"], "start_seconds": ["80", "40"], "properties": ["power, increase, engine", "wind, speak, buffeting"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a man is speaking with wind noise in the background "], "question": "which entity is not a person?", "label": 0}, {"captions": ["multiple adults speaking, and a child shouting in the background", "a toilet flushes and water drains unevenly"], "sample_ids": ["yks4cLgIDMc", "vhJWZheqaE"], "start_seconds": ["170", "0"], "properties": ["background, speaking, child", "water drains unevenly, toilet flushes, water drains"], "captions_pred_video": ["footage of two kids wrestling on the floor", null], "captions_pred_audio": ["a man is speaking and a child is crying", "a toilet is flushed"], "question": "which entity is a video of a toilet flushing?", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "an airplane flies overhead as a woman speaks"], "sample_ids": ["wz7N8YRy74I", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["rooster, crow, background, men", "airplane, fly, overhead"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying overhead", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "several ducks quack and cocks crow far away"], "sample_ids": ["y2ZBGpgbhHM", "sNB8zxXneIM"], "start_seconds": ["30", "20"], "properties": ["birds, tweet, pant", "several, quack, cocks"], "captions_pred_video": [null, "a group of geese in a cage"], "captions_pred_audio": ["birds chirping and a dog panting", "a rooster is crowing and wind is blowing "], "question": "which entity is about birds?", "label": 0}, {"captions": ["an animal quacks rapidly", "water pouring and bubbling"], "sample_ids": ["vh30P49Po6s", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["animal, quacks, rapidly", "water, bubbles, pouring"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a duck is quacking loudly", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a dark barks and whimpers", "small dogs yip and bark sharply"], "sample_ids": ["sYj4hpDUZDQ", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["barks, whimpers, dark", "bark, yip, sharply"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a dog barks and a cat meows", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["w5W5Kqtc8E", "su6FAOcOA8c"], "start_seconds": ["100", "4"], "properties": ["water, splashes, motorboat", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["w5W5Kqtc8E", "wz7N8YRy74I"], "start_seconds": ["100", "30"], "properties": ["wind, engine, scream", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman sneezes then speaks", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["x4dZyf9Gbj0", "sLUnaPT5gM8"], "start_seconds": ["130", "0"], "properties": ["sneezes, speaks, woman", "loud, laughter, intermittent"], "captions_pred_video": ["footage is blurry and out of focus", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a woman sneezes and speaks", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more like a sneeze", "label": 0}, {"captions": ["a woman speaks with water running", "people applaud and hoot and chat quietly"], "sample_ids": ["wTideSjRFS0", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["water, running, woman", "people, applaud, hoot"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["ticking continues without interruption", "water pouring and bubbling"], "sample_ids": ["v-g-j2uTByM", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["ticking, continuous, clock", "water, bubbles, pouring"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a clock is ticking loudly", "water is running from a faucet"], "question": "which entity is not continuous", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "a man speaks over intermittent keyboard taps"], "sample_ids": ["x9JovgqUcs", "tw76HGONaKg"], "start_seconds": ["500", "570"], "properties": ["a, man, speaks, keyboard", "audio, man, keyboard"], "captions_pred_video": [null, "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a man speaks and types on a computer keyboard "], "question": "which entity is a video?", "label": 0}, {"captions": ["a man speaking with light rustling", "birds chirp and objects are moved around"], "sample_ids": ["zOZleIRqZm4", "yPUYU6t3rwo"], "start_seconds": ["80", "370"], "properties": ["light, rustling, man", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "vehicles pass by on a roadway"], "sample_ids": ["shmR4OZtzqA", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["man, engine, idle", "pass, vehicle, roadway"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man speaks while a motor runs", "a car is driving on the road "], "question": "which vehicle is moving", "label": 1}, {"captions": ["a person speaks briefly", "a car accelerates and wind blows"], "sample_ids": ["zOZleIRqZm4", "u0TrcHhkPQ"], "start_seconds": ["80", "20"], "properties": ["person, talk, brief", "accelerates, wind, blows"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a race car accelerates and revs its engine "], "question": "which is not a person", "label": 1}, {"captions": ["a person is whistling a tune", "wind blows as people chatter quietly"], "sample_ids": ["scYRUkrFLiQ", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["a, tune, whistle", "wind, chatter, people"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", "footage is blurry and out of focus"], "captions_pred_audio": ["a person whistling a song", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a chime of a clock followed by various tones of ticking with come clinking"], "sample_ids": ["zY3icUyMdh8", "uqFtmnhuqA8"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "a, b, c"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "shows a clock on the wall of a room with a person standing in front of it"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "mechanisms are ticking and a hammer is striking "], "question": "which entity is a clock?", "label": 1}, {"captions": ["a door opens and closes", "a person sneezes followed by another person speaking"], "sample_ids": ["vBHyYJ8pL0", "t8CV69hcvF0"], "start_seconds": ["2", "210"], "properties": ["open, close, door", "person, sneeze, follow"], "captions_pred_video": [null, "of an airplane flying in the dark sky at night"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a woman sneezes and speaks"], "question": "which entity is a follow up to something else?", "label": 1}, {"captions": ["ticking continues without interruption", "a clock ticktocks"], "sample_ids": ["v-g-j2uTByM", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["ticking, continuous, clock", "ticktocks, clock, ticktocks"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a clock is ticking loudly", "a clock is ticking loudly"], "question": "which clock ticks continuously", "label": 0}, {"captions": ["an engine idles quietly then gradually becomes louder", "loud clanking and banging with brief male speech"], "sample_ids": ["vbr9mHKc8WM", "sWZzXuWYY"], "start_seconds": ["40", "420"], "properties": ["noise, loudness, engine", "male, speech, banging"], "captions_pred_video": [null, null], "captions_pred_audio": ["an engine is idling", "a sewing machine runs and a man speaks"], "question": "which entity is louder", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["zY3icUyMdh8", "uYT5gxnyMWM"], "start_seconds": ["20", "50"], "properties": ["dog, bark, engine", "female, spraying, scream"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a car accelerates and wind blows", "a man talks as something metal hits against and glass is set down"], "sample_ids": ["u0TrcHhkPQ", "x6ijhqRY38s"], "start_seconds": ["20", "250"], "properties": ["accelerates, wind, blows", "something metal, glass, hit"], "captions_pred_video": [null, "a chef preparing a dish with a bottle of wine and a plate of food on a table"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and dishes are clanging "], "question": "which entity is a demonstration of a force", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "water splashes and a door squeaks"], "sample_ids": ["weDbePuc-Xc", "sdXV-ylviw"], "start_seconds": ["40", "190"], "properties": ["cartoon character, music, vocalize", "sound, splash, door"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", null], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a dog barks and taps with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["uYT5gxnyMWM", "xKB8O8LTs6s"], "start_seconds": ["50", "70"], "properties": ["female, spraying, scream", "music, gunfire, explosion"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "a man speaks as a car is passing by"], "sample_ids": ["xjvTpk2Zpr8", "sK4u5T8hW78"], "start_seconds": ["70", "30"], "properties": ["engine, run, wind", "a, car, pass"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is stationary", "label": 1}, {"captions": ["someone snores nearby", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["spJCm8tD9Zo", "uEU-Hg5MTN8"], "start_seconds": ["90", "27"], "properties": ["someone snores, nearby, someone", "a woman, laughs, animal"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["t97k0cejSQE", "uZesmtKZGSw"], "start_seconds": ["250", "250"], "properties": ["bird, chirp, insect", "men, talk, cars"], "captions_pred_video": ["a bee on a purple thistle flower", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a drill runs and two people laugh", "a telephone rings followed by a woman talking"], "sample_ids": ["tEE3MpBt1sg", "tGcFnX0GHI"], "start_seconds": ["50", "0"], "properties": ["two people, laugh, drill", "ring, talk, woman"], "captions_pred_video": ["footage is blurry due to the smoke in the air", null], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a person snores loudly multiple times at a close distance", "a person snores loudly multiple times at a close distance"], "sample_ids": ["sSMl2vc3ek", "sSMl2vc3ek"], "start_seconds": ["20", "20"], "properties": ["loud, multiple, distance", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person snoring loudly", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["wind loudly blowing while people speak in the background followed by a horn blowing", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["xjhAnI2q6hM", "wDVMhEdTiVw"], "start_seconds": ["6", "30"], "properties": ["wind, blow, loudly", "gun, shoot, water"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to cause damage", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["zALy31PjDl0", "uZesmtKZGSw"], "start_seconds": ["21", "250"], "properties": ["a man, a vehicle, a horn", "men, talk, cars"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["several ducks are quacking and squawking", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["wfHeoPDLMaM", "ziUT9IFTkjg"], "start_seconds": ["30", "10"], "properties": ["quacking, squawking, ducks", "background, birds, rustling"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", null], "captions_pred_audio": ["ducks are quacking", "birds are chirping and a chime is ringing "], "question": "which entity is a bird", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "rain falls on a surface as men speak and thunder roars"], "sample_ids": ["w-4gHptFNuU", "w0xsN8X18Y"], "start_seconds": ["21", "30"], "properties": ["engine revs, accelerates, bump", "rain, thunder, surface"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking while a motorboat is moving in the background "], "question": "which entity is a natural occurrence", "label": 1}, {"captions": ["a man talks as several small engines run", "a woman speaks as she rubs two objects together"], "sample_ids": ["u9A6VZQCZpU", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["a, man, talk", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a heavy rain falls endlessly", "an airplane engine roars increasingly louder"], "sample_ids": ["wP8ZKrlx3oA", "vBslzh7saPw"], "start_seconds": ["40", "90"], "properties": ["heavy, rain, fall", "engine, roar, louder"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "a pickup truck carrying a large object down the road"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a jet engine roars and accelerates "], "question": "which entity is louder", "label": 1}, {"captions": ["a clock ticktocks in wind", "plastic is tapped on while someone speaks"], "sample_ids": ["yVumC9TGknc", "wvKpEYswXO0"], "start_seconds": ["30", "150"], "properties": ["ticktocks, clock, wind", "plastic, tap, speak"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a series of beeps and chirps", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["a man speaks as a machine runs", "a man speaks as a motor runs in the background"], "sample_ids": ["vD6lYD1l0BY", "xZepNM9qcRA"], "start_seconds": ["330", "30"], "properties": ["a, machine, run", "background, motor, run"], "captions_pred_video": ["game controller being held in the hands of the person", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a man speaks while a motorcycle revs and accelerates "], "question": "which machine runs in the background", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "a woman speaks and then a man speaks"], "sample_ids": ["xyL9F5VrjkE", "vbpKkWvfOu4"], "start_seconds": ["20", "560"], "properties": ["engine, run, wind", "a, man, speaks"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a woman is speaking and a man is speaking"], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "someone whistles a tune"], "sample_ids": ["vveS8HT7Uog", "sIXTftIuUgw"], "start_seconds": ["100", "90"], "properties": ["a man, objects, speak", "someone, tune, whistle"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", null], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["speaking following by laughing and clapping", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["u2f5NpsoHBg", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["person, laugh, clap", "men, talk, cars"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity shows a person speaking?", "label": 0}, {"captions": ["food fries in a pan as someone talks and cooks", "a infant makes noise and is excited"], "sample_ids": ["ukxt9I7eMMg", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["food, pan, cook", "noise, excited, infant"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "soft movement is accompanied by clocks ticking in the background"], "sample_ids": ["siJFXfGWgDk", "vlJS7LN2XyM"], "start_seconds": ["50", "30"], "properties": ["a, bird, vehicle", "background, clocks, ticking"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a ticktock of a clock"], "question": "which entity has a vehicle in the background?", "label": 0}, {"captions": ["engines sputter roughly and tires squeal", "a telephone rings followed by a woman talking"], "sample_ids": ["zhx6hoYrHeI", "tGcFnX0GHI"], "start_seconds": ["160", "0"], "properties": ["engine, sputter, rough", "ring, talk, woman"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "an engine runs loudly"], "sample_ids": ["wSVhSdj0F0", "vqZuVbG6-HI"], "start_seconds": ["10", "130"], "properties": ["horn honks, keys jingle, slam", "loud, engine, run"], "captions_pred_video": [null, "footage is blurry because it's raining outside"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a motorcycle engine is idling", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vZAqdHZ81yA", "xfaoyyzw2WU"], "start_seconds": ["180", "180"], "properties": ["engine, motorcycle, idling", "loud, jet engine, roar"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["an engine is idling loudly", "an aircraft engine roars and a man speaks "], "question": "which engine is louder", "label": 1}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "a woman speaks happily and an animal chirps"], "sample_ids": ["xNMovAf3o50", "uWAAAL4CIoc"], "start_seconds": ["0", "0"], "properties": ["rain, thunder, music", "a woman, chirps, animal"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", null], "captions_pred_audio": ["thunder and rain with music playing in the background ", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["an engine runs and a man speaks", "wind blows as people chatter quietly"], "sample_ids": ["yT5WfYMRr-U", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["engine, run, man", "wind, chatter, people"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["continuous snoring", "people applaud and hoot and chat quietly"], "sample_ids": ["sLkeqCDJIyw", "wwyfGO2J4"], "start_seconds": ["120", "90"], "properties": ["loud, snoring, noise", "people, applaud, hoot"], "captions_pred_video": [", what is the man doing on the couch? sleeping", null], "captions_pred_audio": ["a person is snoring loudly", "people are clapping and speaking with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "paper is crumpling consistently"], "sample_ids": ["vb1fPSDI4c", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["multiple, people, yell", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a crowd of people are talking and laughing", "paper is crumpled and crinkled"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "paper folding and crinkling"], "sample_ids": ["slZLHwNbbt4", "zPpG3RD8lSs"], "start_seconds": ["300", "20"], "properties": ["a, horn, run", "paper, fold, crinkle"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "the wind blows and a mouse clicks "], "question": "which is not a machine", "label": 1}, {"captions": ["an engine starts and increases in power", "a train engine runs and a horn blows"], "sample_ids": ["zjTG0gaGCUI", "zPX9o1uDiI"], "start_seconds": ["80", "40"], "properties": ["power, increase, engine", "engine, horn, run"], "captions_pred_video": [null, null], "captions_pred_audio": ["a jet engine roars as wind blows ", "a train moves with its horn blowing and wheels squealing "], "question": "which entity is a train engine?", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["wRBHTgrbiwg", "w34HjHr6gAY"], "start_seconds": ["50", "30"], "properties": ["bird, owl, speak", "beeps, hit, woman"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a beep sounds followed by a child speaking"], "question": "which entity has a man speaking briefly?", "label": 0}, {"captions": ["a woman speaks as she rubs two objects together", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vzxHnu-SFEw", "vJ7JPEFhyLA"], "start_seconds": ["80", "16"], "properties": ["two objects, woman, speak", "three men, wind, flow"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a man speaks while water trickles and flows", "some tunes played by whistling"], "sample_ids": ["sapQIQUhFc", "u6BnG6YZqJ4"], "start_seconds": ["280", "0"], "properties": ["water, trickles, flow", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["zj2R0XoFr5k", "uYT5gxnyMWM"], "start_seconds": ["50", "50"], "properties": ["airplane, boy, fly", "a, scream, girl"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a woman is speaking and a baby is crying"], "question": "which entity has a boy speaking?", "label": 0}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["tDlfY3nmx1A", "uEU-Hg5MTN8"], "start_seconds": ["160", "27"], "properties": ["applause, laugh, man", "animal, grunts, snorts"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 0}, {"captions": ["continuous chugging with birds chirping in the background", "a toilet flushes and a female speaks"], "sample_ids": ["xM4joTqDVp4", "yaln9y8I7ms"], "start_seconds": ["160", "230"], "properties": ["background, chirp, birds", "female, flushes, toilet"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "footage is blurry and out of focus"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a toilet flushes and a man speaks"], "question": "which entity is a video of a toilet flushing?", "label": 1}, {"captions": ["water rushes by", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["x-PeY8Yb8M4", "zj2R0XoFr5k"], "start_seconds": ["300", "50"], "properties": ["water, rushes, by", "airplane, boy, fly"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a car is driving on a wet road ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "three men talk while wind blows and some liquid flows"], "sample_ids": ["xKB8O8LTs6s", "vJ7JPEFhyLA"], "start_seconds": ["70", "16"], "properties": ["music, gunfire, explosion", "three men, wind, flow"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sAam2NqGhLY", "xBxDz0CFVn0"], "start_seconds": ["20", "30"], "properties": ["snoring, breathing, child", "stream, water, flow"], "captions_pred_video": ["of a little girl sleeping on a couch", "footage is blurry and out of focus"], "captions_pred_audio": ["a person is snoring", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sapQIQUhFc", "wqZ135Ssz0"], "start_seconds": ["280", "60"], "properties": ["water, trickles, flow", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tDlfY3nmx1A", "tdWhHV3X25Q"], "start_seconds": ["160", "60"], "properties": ["applause, laugh, man", "applause, audience, yells"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a man is speaking and a crowd is clapping"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["an airplane engine runs", "pigeons vocalize and birds chirp"], "sample_ids": ["yVPZ2MNWpms", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["engine, airplane, runs", "vocalize, bird, chirp"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "of the pigeon in the cage"], "captions_pred_audio": ["a car is driving by on the road ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "a clock ticktocks"], "sample_ids": ["wAAkbZToh8", "v-g-j2uTByM"], "start_seconds": ["0", "30"], "properties": ["burp, laugh, speak", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man burps and a woman speaks", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "dishes cling together then a man begins to speak"], "sample_ids": ["xZepNM9qcRA", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["background, motor, run", "cling, speak, dishes"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["heavy rain splashes as it falls", "water is sprayed across a hard surface"], "sample_ids": ["wP8ZKrlx3oA", "sQwlkXjQabo"], "start_seconds": ["40", "10"], "properties": ["fall, rain, splash", "water, spray, surface"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a heavy rain is falling on a surface", "spraying followed by silence"], "question": "which entity is a spray of water?", "label": 1}, {"captions": ["a person snoring several times", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["spJCm8tD9Zo", "wqZ135Ssz0"], "start_seconds": ["90", "60"], "properties": ["snore, person, several", "two men, woman, birds"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a person", "label": 0}, {"captions": ["a man speaks as water trickles down a stream", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sapQIQUhFc", "wz7N8YRy74I"], "start_seconds": ["280", "30"], "properties": ["water, stream, trickles", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in it?", "label": 1}, {"captions": ["some men converse over an engine running", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["sCiy7QS1U", "ukg5L09Wpvo"], "start_seconds": ["300", "150"], "properties": ["men, converse, engine", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "a person screams glaringly"], "sample_ids": ["w2M4i1mklOA", "xC8kbrKJmco"], "start_seconds": ["30", "0"], "properties": ["loud, chime, bell", "glaringly, screams, person"], "captions_pred_video": ["footage of an antique clock", null], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a goat is bleating "], "question": "which entity is louder", "label": 1}, {"captions": ["a kid speaks followed by music playing", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["tQWGZLItBXk", "uZesmtKZGSw"], "start_seconds": ["170", "250"], "properties": ["music, kid, speak", "men, talk, cars"], "captions_pred_video": ["worms revolution screenshots", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a woman speaks as she rubs two objects together", "a man speaks as a vehicles passes by then a woman speaks"], "sample_ids": ["vzxHnu-SFEw", "siJFXfGWgDk"], "start_seconds": ["80", "50"], "properties": ["two objects, woman, speak", "man, woman, vehicle"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking and birds are chirping in the background "], "question": "which entity has a man speaking?", "label": 1}, {"captions": ["a man speaks as a car is passing by", "water flows as men speak and yell"], "sample_ids": ["sK4u5T8hW78", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["a, car, pass", "water, flow, men"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking as a car is passing by?", "label": 0}, {"captions": ["a young woman speaks and laughs and an animal snorts", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["uEU-Hg5MTN8", "xKB8O8LTs6s"], "start_seconds": ["27", "70"], "properties": ["a woman, laughs, animal", "music, gunfire, explosion"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more action", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["ylpYOorfH4o", "uEU-Hg5MTN8"], "start_seconds": ["410", "27"], "properties": ["motor, run, steady", "a woman, laughs, animal"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "a woman speaks followed by another woman whimpering and speaking"], "sample_ids": ["sWZzXuWYY", "xOZfdgAgJ9o"], "start_seconds": ["420", "40"], "properties": ["male, speech, banging", "woman, whimpering, speaking"], "captions_pred_video": [null, "footage of a woman talking to a man in a doctor's office"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a woman speaking?", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "here comes the train and it starts to blow the horn and get close"], "sample_ids": ["sQGXqGcwOTc", "s7knHCFW82w"], "start_seconds": ["3", "30"], "properties": ["cling, speak, dishes", "blow horn, get close, train"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage of the train on the tracks near a building and a car parked on the side of the road"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a train is blowing its horn and its wheels are squealing "], "question": "which entity is moving", "label": 1}, {"captions": ["a dog barks and whimpers", "a woman speaks as she rubs two objects together"], "sample_ids": ["sShpyu2l4YQ", "vzxHnu-SFEw"], "start_seconds": ["0", "80"], "properties": ["barks, whimpers, dog", "two objects, woman, speak"], "captions_pred_video": ["the puppies are playing with a toy", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a dog is barking and growling", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "water splashes as an animal walks through"], "sample_ids": ["yks4cLgIDMc", "w1ir-sZ3Im8"], "start_seconds": ["170", "90"], "properties": ["background, speaking, child", "animal, water, splashes"], "captions_pred_video": ["footage of two kids wrestling on the floor", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking and a child is crying", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vzceMbklWc", "ukg5L09Wpvo"], "start_seconds": ["180", "150"], "properties": ["water, faucet, sink", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["water is running and a man is speaking", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "water is sprayed across a hard surface"], "sample_ids": ["uiItxDsDMFI", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["wood, piece, saw", "water, spray, surface"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a saw is being used with background noise ", "spraying followed by silence"], "question": "which entity is wetter", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "frogs croak and vocalize"], "sample_ids": ["xfudFO976zE", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["animal, bleats, cry", "croak, vocalize, frog"], "captions_pred_video": ["footage is blurry and shaky", "a close up of a frog in the water"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a frog is croaking"], "question": "which animal is more vocal", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "water splashes and a door squeaks"], "sample_ids": ["wz7N8YRy74I", "sdXV-ylviw"], "start_seconds": ["30", "190"], "properties": ["rooster, crow, background, men", "sound, splash, door"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", null], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a dog barks and taps with background noise "], "question": "which entity has a door?", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "a stream of water runs briefly"], "sample_ids": ["yVumC9TGknc", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["humming, clock, birds", "stream, water, run"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a series of beeps and chirps", "a car is driving on a wet road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["birds chirp and wind blows", "a man speaks as a car is passing by"], "sample_ids": ["sxIvBMSavMQ", "sK4u5T8hW78"], "start_seconds": ["210", "30"], "properties": ["birds, chirp, wind", "a, car, pass"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "an airplane engine spools and people speak"], "sample_ids": ["smDKStoHBJo", "wTjoRj1se3U"], "start_seconds": ["0", "390"], "properties": ["a, talk, baby, cry", "airplane, engine, spool"], "captions_pred_video": ["a man holding a crying baby in his arms", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a jet engine is running and people are talking"], "question": "which entity is a video of a person talking?", "label": 0}, {"captions": ["a man speaks while rain falls onto a hard surface", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["wqN6IIHw3po", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["rain, surface, fall", "gun, shoot, water"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking and water is splashing", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is not a gun?", "label": 0}, {"captions": ["a baby cries and a woman speaks", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["tMbMDvT50j8", "w34HjHr6gAY"], "start_seconds": ["12", "30"], "properties": ["a, cry, woman", "beeps, hit, woman"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a baby cries and a woman speaks", "a beep sounds followed by a child speaking"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["a race car approaches quickly and slows down squealing tires", "wind blowing followed by a zoom"], "sample_ids": ["sEprKHm8Sj8", "vr8ZXjEBhMQ"], "start_seconds": ["90", "150"], "properties": ["car, tires, slows", "wind, blow, zoom"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "a door opens and closes"], "sample_ids": ["tOj4tdLRaA", "vBHyYJ8pL0"], "start_seconds": ["70", "2"], "properties": ["woman, laugh, baby", "open, close, door"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is more passive", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "three men talk while wind blows and some liquid flows"], "sample_ids": ["yZmhM1HcsyE", "vJ7JPEFhyLA"], "start_seconds": ["4", "16"], "properties": ["engine, roar, water", "three men, wind, flow"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["a person screams glaringly", "a toilet flushes and water drains"], "sample_ids": ["xC8kbrKJmco", "sfAvvZwdLCY"], "start_seconds": ["0", "20"], "properties": ["glaringly, screams, person", "water drains, flushes, water"], "captions_pred_video": [null, "footage of the toilet in the bathroom"], "captions_pred_audio": ["a goat is bleating ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "a cat meows and children speak"], "sample_ids": ["wqN6IIHw3po", "x5cuQjOdM3E"], "start_seconds": ["30", "30"], "properties": ["rain, surface, fall", "cat, speak, children"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "a black background with an airplane flying in the sky"], "captions_pred_audio": ["a man is speaking and water is splashing", "a cat meows and a woman speaks"], "question": "which entity is a cat?", "label": 1}, {"captions": ["a woman speaks and dog vocalizes", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["uWAAAL4CIoc", "wz7N8YRy74I"], "start_seconds": ["0", "30"], "properties": ["a, dog, vocalize", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which animal is speaking", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["wy1eKjR7KC0", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["people, talk, distance", "loud, jet engine, roar"], "captions_pred_video": ["two police officers riding motorcycles down the street", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking and a siren is going off", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["a toilet flushes and water drains", "water is sprayed across a hard surface"], "sample_ids": ["sfAvvZwdLCY", "sQwlkXjQabo"], "start_seconds": ["20", "10"], "properties": ["water drains, flushes, water", "water, spray, surface"], "captions_pred_video": ["footage of the toilet in the bathroom", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a toilet is flushed", "spraying followed by silence"], "question": "which entity is a source of water", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "a infant makes noise and is excited"], "sample_ids": ["wnpJndXuxLc", "wIJK3-5y0kA"], "start_seconds": ["50", "30"], "properties": ["blows, vehicle, train", "noise, excited, infant"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "plastic is tapped on while someone speaks"], "sample_ids": ["vzceMbklWc", "wvKpEYswXO0"], "start_seconds": ["180", "150"], "properties": ["water, faucet, sink", "plastic, tap, speak"], "captions_pred_video": [null, "of the person preparing food in the kitchen"], "captions_pred_audio": ["water is running and a man is speaking", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["xBxDz0CFVn0", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["stream, water, flow", "loud, laughter, intermittent"], "captions_pred_video": ["footage is blurry and out of focus", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a stream of water flows as people talk and wind blows", "people cheer as a vehicle engine revs"], "sample_ids": ["xBxDz0CFVn0", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["stream, water, flow", "engine revs, vehicle, people"], "captions_pred_video": ["footage is blurry and out of focus", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "a man speaks as a motor runs in the background"], "sample_ids": ["sapQIQUhFc", "xZepNM9qcRA"], "start_seconds": ["280", "30"], "properties": ["water, trickles, flow", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a motorcycle engine works nearby", "dishes cling together then a man begins to speak"], "sample_ids": ["tOSWIURC-4", "sQGXqGcwOTc"], "start_seconds": ["0", "3"], "properties": ["engine, work, nearby", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a lawn mower is running ", "mechanisms are operating and water is splashing "], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["multiple ducks quack continuously", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["wfHeoPDLMaM", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["multiple, quack, continuously", "loud, laughter, intermittent"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["ducks are quacking", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["w0xsN8X18Y", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["music, surface, rain", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "a man speaks as a motor runs in the background"], "sample_ids": ["vZAw4apG0Es", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["background, clock, ticktocks", "background, motor, run"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a clock in the background", "label": 0}, {"captions": ["a man sprays as a scraping occurs in the background", "a telephone rings followed by a woman talking"], "sample_ids": ["sOa7g-44Dag", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["background, man, spray", "ring, talk, woman"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", null], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "a woman speaks happily and an animal chirps"], "sample_ids": ["zsLxS-uLJTw", "uWAAAL4CIoc"], "start_seconds": ["20", "0"], "properties": ["horn, blast, train", "a woman, chirps, animal"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", null], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks followed by another man speaking outside", "a child speaks in closed space"], "sample_ids": ["viuTg1M-dqg", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["two men, speak, follow", "child, space, speak"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "here comes the train and it starts to blow the horn and get close"], "sample_ids": ["zY3icUyMdh8", "s7knHCFW82w"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "blow horn, get close, train"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage of the train on the tracks near a building and a car parked on the side of the road"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a train is blowing its horn and its wheels are squealing "], "question": "which entity is a train?", "label": 1}, {"captions": ["a cat meows and children speak", "several insects fly while two men talk"], "sample_ids": ["x5cuQjOdM3E", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["cat, speak, children", "several, fly, men"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["male speech with light ticking", "three men talk while wind blows and some liquid flows"], "sample_ids": ["xO-Q2BlIIPU", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["male, speech, ticking", "three men, wind, flow"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["speaking following by laughing and clapping", "a frog croaks as other frogs croak in the background"], "sample_ids": ["u2f5NpsoHBg", "yswmmRZFItk"], "start_seconds": ["30", "0"], "properties": ["person, laugh, clap", "background, frog, croak"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "a close up of a frog in the water"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a frog is croaking"], "question": "which entity is a frog", "label": 1}, {"captions": ["a male speaks over some small clicks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["uXxVebHsGZ8", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["male, clicks, speak", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "birds chirp and objects are moved around"], "sample_ids": ["y8dSeubCNI", "yPUYU6t3rwo"], "start_seconds": ["4", "370"], "properties": ["men, women, car", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["an engine revving and people talking in the background", "insects buzz and a man speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["leaves rustle while man speaks", "a car accelerates and wind blows"], "sample_ids": ["zOZleIRqZm4", "u0TrcHhkPQ"], "start_seconds": ["80", "20"], "properties": ["leaves, rustle, speak", "accelerates, wind, blows"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "motors rev and run loudly as a person laughs"], "sample_ids": ["x9JovgqUcs", "zl9Dqx-j7q4"], "start_seconds": ["500", "6"], "properties": ["a, man, speaks, keyboard", "motors rev, laugh, loudly"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a jet engine roars "], "question": "which entity is a person", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "a train engine runs and a horn blows"], "sample_ids": ["sapQIQUhFc", "zPX9o1uDiI"], "start_seconds": ["280", "40"], "properties": ["liquid, flow, distance", "engine, horn, run"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a train moves with its horn blowing and wheels squealing "], "question": "which entity is moving", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["sQGXqGcwOTc", "zl9Dqx-j7q4"], "start_seconds": ["3", "6"], "properties": ["cling, speak, dishes", "engine, laugh, loud"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage of a man driving a car in the dark"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "water is sprayed across a hard surface"], "sample_ids": ["tEE3MpBt1sg", "sQwlkXjQabo"], "start_seconds": ["50", "10"], "properties": ["drill, something, laugh", "water, spray, surface"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["birds twitter and chirp and clatter", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["yeFvk9x0wWI", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["chirp, twitter, clatter", "rooster, crow, background, men"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a rooster?", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "vehicles pass by on a roadway"], "sample_ids": ["vXlk0lIQBFo", "tgbONvsP47Y"], "start_seconds": ["470", "0"], "properties": ["wind, talk, vocalize", "pass, vehicle, roadway"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "footage of a fire truck entering a garage"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "people cheer as a vehicle engine revs"], "sample_ids": ["uZesmtKZGSw", "xjhAnI2q6hM"], "start_seconds": ["250", "6"], "properties": ["men, talk, cars", "engine revs, vehicle, people"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["birds chirp and a pop occurs before a man speaks", "a small musical boom and then birds tweet and a few dogs pant"], "sample_ids": ["zuua6-5goWw", "y2ZBGpgbhHM"], "start_seconds": ["30", "30"], "properties": ["sound, pop, bird", "birds, tweet, pant"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", null], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "birds chirping and a dog panting"], "question": "which entity has more birds", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "a stream of water flows as people talk and wind blows"], "sample_ids": ["w8uLijTqtlU", "xBxDz0CFVn0"], "start_seconds": ["70", "30"], "properties": ["wind, microphone, noise", "stream, water, flow"], "captions_pred_video": ["footage is blurry and shaky", "footage is blurry and out of focus"], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["s6DESzUTGjY", "yajyRTUQk3U"], "start_seconds": ["16", "400"], "properties": ["wind, laugh, woman", "a woman, something, fried"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a woman is speaking while food is frying in the background"], "question": "which woman is frying something?", "label": 1}, {"captions": ["motors runs briefly and tires screech", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["yRx9txMcBl0", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["motors, tires, screech", "a woman, laughs, animal"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "an insect buzzes around continuously"], "sample_ids": ["spJCm8tD9Zo", "v25l1jef3JY"], "start_seconds": ["90", "0"], "properties": ["snores, wheezes, sleeps", "buzzes, continuously, insect"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a person is snoring loudly", "a fly is buzzing around a microphone "], "question": "which entity is not a person", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "a woman speaks as she rubs two objects together"], "sample_ids": ["w5W5Kqtc8E", "vzxHnu-SFEw"], "start_seconds": ["100", "80"], "properties": ["water, splashes, motorboat", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a skill", "label": 1}, {"captions": ["food is frying while a woman speaks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["yhQ2Lg-7qDY", "sSMl2vc3ek"], "start_seconds": ["130", "20"], "properties": ["food, woman, speak", "loud, multiple, distance"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", null], "captions_pred_audio": ["a faucet is running and a man is speaking", "a person snoring loudly"], "question": "which entity is not a person?", "label": 0}, {"captions": ["wind blows and people talk while livestock vocalizes", "birds chirp and objects are moved around"], "sample_ids": ["vXlk0lIQBFo", "yPUYU6t3rwo"], "start_seconds": ["470", "370"], "properties": ["wind, talk, vocalize", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["t8CV69hcvF0", "wz7N8YRy74I"], "start_seconds": ["210", "30"], "properties": ["person, sneeze, follow", "rooster, crow, background, men"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a woman sneezes and speaks", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["birds fly and flutter around", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["wGKgwOP3h30", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["fly, flutter, around", "a woman, laughs, animal"], "captions_pred_video": ["of the pigeons in the coop", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["pigeons coo and flap their wings", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 0}, {"captions": ["a girl talking, laughing and sneezing noise", "people cheer as a vehicle engine revs"], "sample_ids": ["y4tPJXBKDig", "xjhAnI2q6hM"], "start_seconds": ["20", "6"], "properties": ["a, noise, talk", "engine revs, vehicle, people"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "a clock ticktocks"], "sample_ids": ["sAam2NqGhLY", "v-g-j2uTByM"], "start_seconds": ["20", "30"], "properties": ["snoring, breathing, child", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of a little girl sleeping on a couch", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a person is snoring", "a clock is ticking loudly"], "question": "which entity is not a clock?", "label": 0}, {"captions": ["a female speaks softly as paper crinkles", "water splashes as an animal walks through"], "sample_ids": ["xvDdE3zNf8Y", "w1ir-sZ3Im8"], "start_seconds": ["120", "90"], "properties": ["a, female, speaks", "animal, water, splashes"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a woman speaks and crumples paper", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a goat bleats as a person speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["tPJvjq9QePY", "tDVADusiIoc"], "start_seconds": ["40", "60"], "properties": ["bleats, person, speak", "water, radio, man"], "captions_pred_video": ["a dog and a sheep in a barn", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a baby cries and a man speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a person speaking over a radio?", "label": 1}, {"captions": ["a helicopter engine runs", "winds blows roughly as a vehicle races past"], "sample_ids": ["t5ZbXbniOWk", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["engine, helicopter, run", "wind, blows, vehicle"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a helicopter is flying overhead ", "a jet engine roars and wind blows "], "question": "which entity is moving", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "a woman speaks as frying food sizzles"], "sample_ids": ["v5P-ThUCINM", "wTideSjRFS0"], "start_seconds": ["400", "30"], "properties": ["background, chirp, bird", "food, sizzle, woman"], "captions_pred_video": [null, "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a woman is speaking while water is running in the background"], "question": "which entity is more likely to be in a restaurant", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sWZzXuWYY", "xBxDz0CFVn0"], "start_seconds": ["420", "30"], "properties": ["male, speech, banging", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "a speedboat passes quickly on the water"], "sample_ids": ["sLUnaPT5gM8", "tjmoSi330GM"], "start_seconds": ["0", "23"], "properties": ["loud, laughter, intermittent", "speed, water, boat"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "a person riding a jet ski on a lake with trees in the background"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a motorboat speeds through water with wind noise "], "question": "which is moving faster", "label": 0}, {"captions": ["insects humming with a dog barking and small goat bleating", "an airplane engine runs"], "sample_ids": ["tIY7qOV3rEM", "yVPZ2MNWpms"], "start_seconds": ["0", "0"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "engine, airplane, runs"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a car is driving by on the road "], "question": "which entity is a machine", "label": 1}, {"captions": ["a power tool runs and touches a surface", "a diesel truck engine runs continuously"], "sample_ids": ["zfvPRf3chY", "sZvwOuuPGP0"], "start_seconds": ["290", "50"], "properties": ["power tool, run, touch", "engine, diesel, truck"], "captions_pred_video": [null, "of a bulldozer clearing a road in a forest stock footage and royalty-free videos"], "captions_pred_audio": ["a man is speaking while a power tool is being used ", "a medium engine is running "], "question": "which entity is a machine", "label": 1}, {"captions": ["a motorcycle engine works nearby", "an airplane engine runs"], "sample_ids": ["tOSWIURC-4", "yVPZ2MNWpms"], "start_seconds": ["0", "0"], "properties": ["engine, work, nearby", "engine, airplane, runs"], "captions_pred_video": [null, "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a lawn mower is running ", "a car is driving by on the road "], "question": "which entity has a moving engine", "label": 1}, {"captions": ["children cry and people talk", "vehicles pass by on a roadway"], "sample_ids": ["xLwHe825Zs", "tgbONvsP47Y"], "start_seconds": ["18", "0"], "properties": ["people talk, children cry, people talk", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a baby cries and a woman speaks", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a machine runs continuously", "people cheer as a vehicle engine revs"], "sample_ids": ["wdXV3Pv0jiY", "xjhAnI2q6hM"], "start_seconds": ["11", "6"], "properties": ["machine, running, continuously", "engine revs, vehicle, people"], "captions_pred_video": ["footage is blurry and shaky", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a truck is revving its engine and a man is speaking "], "question": "which machine is running continuously", "label": 0}, {"captions": ["a saw finishes running as metal clings in the background", "a power tool runs and touches a surface"], "sample_ids": ["zofjfKhqLk8", "zfvPRf3chY"], "start_seconds": ["10", "290"], "properties": ["background, metal, clings", "power tool, run, touch"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking while a power tool is being used "], "question": "which tool is touching a surface", "label": 1}, {"captions": ["wind blows strongly and a young man speaks", "a person is burping then speaks and laughs"], "sample_ids": ["vs65y4qmyBE", "wAAkbZToh8"], "start_seconds": ["340", "0"], "properties": ["wind, blows, strongly", "burp, laugh, speak"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man burps and a woman speaks"], "question": "which entity is speaking", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "an airplane engine spools and people speak"], "sample_ids": ["yJ0TePmaOo", "wTjoRj1se3U"], "start_seconds": ["390", "390"], "properties": ["two hard objects, man, speak", "airplane, engine, spool"], "captions_pred_video": [null, "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a jet engine is running and people are talking"], "question": "which object is moving", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "a woman speaks as she rubs two objects together"], "sample_ids": ["sWZzXuWYY", "vzxHnu-SFEw"], "start_seconds": ["420", "80"], "properties": ["male, clanks, thumps", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a machine?", "label": 0}, {"captions": ["a crowd yells, reacts and applauds", "females talk and laugh over gusting wind"], "sample_ids": ["wztCSUxOf8", "un9VQlzgZM"], "start_seconds": ["130", "5"], "properties": ["a crowd, yells, applauds", "females, talk, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a woman is speaking and laughing with wind noise and breathing in the background "], "question": "which entity is more likely to be at a sporting event", "label": 0}, {"captions": ["a man speaks as horns blow", "a man speaks followed by another man speaking outside"], "sample_ids": ["tHyNqRyK34A", "viuTg1M-dqg"], "start_seconds": ["24", "30"], "properties": ["a, man, speaks", "two men, speak, follow"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a horn rings out as a machine runs by"], "sample_ids": ["vZAw4apG0Es", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["background, tick, repeat", "a, horn, run"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a clock is ticking and people are talking", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity has a horn?", "label": 1}, {"captions": ["water bubbles and gurgles.", "a car speeding up in the distance"], "sample_ids": ["tB7hWb9gTuQ", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["bubbles, gurgles, water", "distance, car, speed"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", null], "captions_pred_audio": ["water is splashing and gurgling", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["zofjfKhqLk8", "ziUT9IFTkjg"], "start_seconds": ["10", "10"], "properties": ["background, metal, clank", "background, birds, rustling"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "birds are chirping and a chime is ringing "], "question": "which entity has a bird in the background?", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["wSVhSdj0F0", "uEU-Hg5MTN8"], "start_seconds": ["10", "27"], "properties": ["horn honks, keys jingle, slam", "animal, grunts, snorts"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity has a snort?", "label": 1}, {"captions": ["a duck quacks continuously", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vh30P49Po6s", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["quacks, continuously, duck", "a woman, a television program, a bird"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a duck is quacking loudly", "a woman is speaking and a dog is whimpering"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "waves crash against a shoreline and people speak"], "sample_ids": ["sfAvvZwdLCY", "yFB25fqfU8I"], "start_seconds": ["20", "300"], "properties": ["flushes, drains, water", "wave, crash, shoreline"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a source of water", "label": 0}, {"captions": ["a young woman speaks over spraying and another person yells", "a vehicle engine accelerating then running on idle"], "sample_ids": ["uYT5gxnyMWM", "vYkA3cfXp5Q"], "start_seconds": ["50", "30"], "properties": ["person, spray, yell", "engine, accelerate, idle"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["birds chirp and wind blows", "a woman speaks happily and an animal chirps"], "sample_ids": ["sxIvBMSavMQ", "uWAAAL4CIoc"], "start_seconds": ["210", "0"], "properties": ["birds, chirp, wind", "a woman, chirps, animal"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", null], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a woman is speaking and a dog is barking "], "question": "which entity is a person", "label": 1}, {"captions": ["a goat screams and people speak in the background", "someone is typing on a computer keyboard"], "sample_ids": ["xC8kbrKJmco", "v0x1odnXtP0"], "start_seconds": ["0", "210"], "properties": ["background, goat, scream", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a goat is bleating ", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a person whistles a meandering tune", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["uFoga8sHpiw", "zFjIWfSD-4"], "start_seconds": ["90", "410"], "properties": ["person, tune, whistle", "People, motor, brakes"], "captions_pred_video": ["footage of a bird in a cage", null], "captions_pred_audio": ["a person whistles a song", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is more likely to be in a car", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "a vehicle engine accelerating then running on idle"], "sample_ids": ["vBslzh7saPw", "vYkA3cfXp5Q"], "start_seconds": ["90", "30"], "properties": ["power, scream, increase", "engine, accelerate, idle"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a jet engine roars and accelerates ", "an engine is idling"], "question": "which engine is running on idle", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["zl9Dqx-j7q4", "tDVADusiIoc"], "start_seconds": ["6", "60"], "properties": ["motors rev, laugh, loudly", "water, radio, man"], "captions_pred_video": ["footage of a man driving a car in the dark", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a jet engine roars ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a train horn sounds as it passes by", "a train horn blows as it passes by"], "sample_ids": ["ukg5L09Wpvo", "zVacuqSb4LI"], "start_seconds": ["150", "30"], "properties": ["sound, train, horn", "horn, blows, train"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which train horn blows as it passes by", "label": 1}, {"captions": ["birds chirp and wind blows", "an engine runs loudly"], "sample_ids": ["sxIvBMSavMQ", "vqZuVbG6-HI"], "start_seconds": ["210", "130"], "properties": ["birds, chirp, wind", "loud, engine, run"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "footage is blurry because it's raining outside"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a lawn mower is running and men are speaking "], "question": "which entity is quieter", "label": 0}, {"captions": ["a child speaks", "a duck quacks continuously"], "sample_ids": ["yW6FWLSLkx4", "vh30P49Po6s"], "start_seconds": ["40", "30"], "properties": ["a, child, speaks", "quacks, continuously, duck"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a duck is quacking loudly"], "question": "which entity is speaking", "label": 0}, {"captions": ["a motor noise is accompanied by a door opening and closing", "pigeons vocalize and birds chirp"], "sample_ids": ["vBHyYJ8pL0", "uiS58TNyUiw"], "start_seconds": ["2", "430"], "properties": ["noise, door, opening", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["zhx6hoYrHeI", "xfaoyyzw2WU"], "start_seconds": ["160", "180"], "properties": ["engine, sputter, rough", "loud, jet engine, roar"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a car accelerates and revs its engine ", "an aircraft engine roars and a man speaks "], "question": "which engine is louder", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["s6DESzUTGjY", "vfYTJq7nU"], "start_seconds": ["16", "130"], "properties": ["wind, laugh, woman", "rustling, ducks, quack"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", null], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["some people speak", "a child speaks in closed space"], "sample_ids": ["vbZ-0lGPneg", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "child, space, speak"], "captions_pred_video": ["of a man holding a baby duck in his hands", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking english", "label": 0}, {"captions": ["people speak in the background as a clock ticktocks", "someone is typing on a computer keyboard"], "sample_ids": ["vZAw4apG0Es", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["background, clock, ticktocks", "keyboard, type, computer"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "how to make money on youtube in spanish"], "captions_pred_audio": ["a clock is ticking and people are talking", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "a infant makes noise and is excited"], "sample_ids": ["sofxkNWaP0s", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["wind, engine, louder", "noise, excited, infant"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vzceMbklWc", "uYT5gxnyMWM"], "start_seconds": ["180", "50"], "properties": ["water, faucet, sink", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["water is running and a man is speaking", "a woman is speaking and a baby is crying"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["u6jIvCtKarQ", "w5W5Kqtc8E"], "start_seconds": ["70", "100"], "properties": ["a, man, speaks", "wind, blow, vehicle"], "captions_pred_video": ["footage of a person using a blender on a stove top", null], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "water quietly rushes by while birds chirp in the background"], "sample_ids": ["ukg5L09Wpvo", "sYITalLZjj4"], "start_seconds": ["150", "30"], "properties": ["a train, a horn, a bell", "water, rushes, background, birds"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "two ducks are swimming in the water near each other"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "wind blows and birds chirp"], "question": "which entity is quieter", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["sWZzXuWYY", "uYT5gxnyMWM"], "start_seconds": ["420", "50"], "properties": ["male, speech, banging", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a scream", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wRBHTgrbiwg", "sSMl2vc3ek"], "start_seconds": ["50", "20"], "properties": ["bird, owl, speak", "loud, multiple, distance"], "captions_pred_video": ["of a bee pollinating the flowers in the field", null], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "a baby cries and fusses, a woman speaks, and a man speaks"], "sample_ids": ["sWZzXuWYY", "wyllXV6PjKo"], "start_seconds": ["420", "30"], "properties": ["male, speech, banging", "a baby, a woman, a man"], "captions_pred_video": [null, null], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a woman speaks and a baby cries"], "question": "which entity is more quiet", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["sQGXqGcwOTc", "tdWhHV3X25Q"], "start_seconds": ["3", "60"], "properties": ["audio, kid, giggles", "applause, audience, yells"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "water pouring and bubbling"], "sample_ids": ["yDoT73BWsdA", "uyRfq-jKPpo"], "start_seconds": ["10", "50"], "properties": ["engine, revs, vehicle", "water, bubbles, pouring"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a cat meows and children speak", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["x5cuQjOdM3E", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["cat, speak, children", "men, talk, cars"], "captions_pred_video": ["a black background with an airplane flying in the sky", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a door opens and birds chirp", "an adult male speaks and dials a rotary phone"], "sample_ids": ["yeFvk9x0wWI", "tK4VlLsNxak"], "start_seconds": ["30", "120"], "properties": ["door, open, birds", "An adult male speaks, dials, and speaks into a rotary phone"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "person is wearing a headset and holding a remote control in his hand"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking and using a sewing machine"], "question": "which entity is a video", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "a man speaks as a car is passing by"], "sample_ids": ["s3cTDAj31g", "sK4u5T8hW78"], "start_seconds": ["80", "30"], "properties": ["man, talk, woman", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking and a baby is crying", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a man talking?", "label": 0}, {"captions": ["a woman and man speak while food is frying", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["zk-xJGQU8-4", "tDVADusiIoc"], "start_seconds": ["130", "60"], "properties": ["food, man, woman", "water, radio, man"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio?", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["su6FAOcOA8c", "w34HjHr6gAY"], "start_seconds": ["4", "30"], "properties": ["engine, run, woman", "beeps, hit, woman"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a woman and man are speaking", "a stream of water runs briefly"], "sample_ids": ["vbpKkWvfOu4", "x-PeY8Yb8M4"], "start_seconds": ["560", "300"], "properties": ["two people, speaking, woman, man", "stream, water, run"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a power tool runs and touches a surface", "plastic is tapped on while someone speaks"], "sample_ids": ["zfvPRf3chY", "wvKpEYswXO0"], "start_seconds": ["290", "150"], "properties": ["power tool, run, touch", "plastic, tap, speak"], "captions_pred_video": [null, "of the person preparing food in the kitchen"], "captions_pred_audio": ["a man is speaking while a power tool is being used ", "a woman is speaking and tapping with background noise and water running "], "question": "which is not a power tool", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "winds blows roughly as a vehicle races past"], "sample_ids": ["tEE3MpBt1sg", "xjvTpk2Zpr8"], "start_seconds": ["50", "70"], "properties": ["drill, something, laugh", "wind, blows, vehicle"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["wy1eKjR7KC0", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["people, talk, distance", "motor noise, horn, siren"], "captions_pred_video": ["two police officers riding motorcycles down the street", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man is speaking and a siren is going off", "a truck is honking its horn and a siren is blaring "], "question": "which entity is followed by a horn honking and a siren wailing?", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "a car speeding up in the distance"], "sample_ids": ["v7jJS8aAyA", "u0TrcHhkPQ"], "start_seconds": ["10", "20"], "properties": ["wind, blows, loudly", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a machine beeps continuously", "a duck quacks continuously"], "sample_ids": ["y682ml90jGw", "vh30P49Po6s"], "start_seconds": ["11", "30"], "properties": ["beeps, machine, continuously", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a beeping sound is being made ", "a duck is quacking loudly"], "question": "which entity is a bird", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "some tunes played by whistling"], "sample_ids": ["wztCSUxOf8", "u6BnG6YZqJ4"], "start_seconds": ["130", "0"], "properties": ["a crowd, yells, applauds", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a person whistling a song"], "question": "which entity is playing a tune", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "a man speaks as a motor runs in the background"], "sample_ids": ["ylpYOorfH4o", "xZepNM9qcRA"], "start_seconds": ["410", "30"], "properties": ["engine, running, wind", "background, motor, run"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a man speaks as a car is passing by"], "sample_ids": ["sK4u5T8hW78", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["a, car, pass", "a, car, pass"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a photograph", "label": 1}, {"captions": ["women speak and laugh as wind blows", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["un9VQlzgZM", "xfaoyyzw2WU"], "start_seconds": ["5", "180"], "properties": ["wind, speak, laugh", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["sa6TLVbooCc", "wDVMhEdTiVw"], "start_seconds": ["240", "30"], "properties": ["people, laugh, child", "gun, shoot, water"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "a child speaks in closed space"], "sample_ids": ["u6jIvCtKarQ", "yW6FWLSLkx4"], "start_seconds": ["70", "40"], "properties": ["a, man, speaks", "child, space, speak"], "captions_pred_video": ["footage of a person using a blender on a stove top", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 0}, {"captions": ["speaking following by laughing and clapping", "insects humming with a dog barking and small goat bleating"], "sample_ids": ["u2f5NpsoHBg", "tIY7qOV3rEM"], "start_seconds": ["30", "0"], "properties": ["person, laugh, clap", "animal, bark, dog, barking, small, goat, bleating"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "a dog is standing in the middle of a dirt road in the woods"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a dog is barking and a cat is meowing"], "question": "which animal is barking", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["xyL9F5VrjkE", "y8WEcpOlT3I"], "start_seconds": ["20", "40"], "properties": ["engine, run, wind", "harsh, wind, blows"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "on how to use a sewing machine youtube"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man is speaking with wind noise in the background "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["goats bleat and people speak", "an infant crying as a woman laughs"], "sample_ids": ["z5iUE5h0EPs", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["goats bleat, people speak, language", "a, laugh, infant"], "captions_pred_video": ["of the goat in the barn", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a goat bleats and a man speaks", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "some men converse over an engine running"], "sample_ids": ["uiItxDsDMFI", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["wood, piece, saw", "men, converse, engine"], "captions_pred_video": ["a man cutting a log with an axe in the woods", null], "captions_pred_audio": ["a saw is being used with background noise ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a piece of wood being sawed?", "label": 0}, {"captions": ["here comes the train and it starts to blow the horn and get close", "birds chirp and objects are moved around"], "sample_ids": ["s7knHCFW82w", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["blow horn, get close, train", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "insects buzz and a man speaks"], "question": "which entity is moving around objects", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "a propeller rotates loudly and intensely"], "sample_ids": ["wSVhSdj0F0", "ugHJF0hfYkg"], "start_seconds": ["10", "10"], "properties": ["horn honks, keys jingle, slam", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["wind blows as people chatter quietly", "a duck quacks loudly and continuously"], "sample_ids": ["xBxDz0CFVn0", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["wind, chatter, people", "loud, continuous, quacks"], "captions_pred_video": ["footage is blurry and out of focus", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 0}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "dishes cling together then a man begins to speak"], "sample_ids": ["w2JXXIAdUdg", "sQGXqGcwOTc"], "start_seconds": ["10", "3"], "properties": ["snoring, distance, person", "cling, speak, dishes"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a person snoring and a dog whimpering", "mechanisms are operating and water is splashing "], "question": "which entity has a person speaking in the distance?", "label": 0}, {"captions": ["a toilet flushes and a female speaks", "a woman speaks and a baby laughs"], "sample_ids": ["yaln9y8I7ms", "tOj4tdLRaA"], "start_seconds": ["230", "70"], "properties": ["female, flushes, toilet", "woman, laugh, baby"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a toilet flushes and a man speaks", "a baby laughs and a woman speaks"], "question": "which entity has a baby laugh?", "label": 1}, {"captions": ["a machine runs continuously", "dogs bark as an engine runs and a person whistles"], "sample_ids": ["wdXV3Pv0jiY", "zY3icUyMdh8"], "start_seconds": ["11", "20"], "properties": ["machine, running, continuously", "dog, bark, engine"], "captions_pred_video": ["footage is blurry and shaky", "footage of a bus driving through a residential street at night"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a car is driving and dogs are barking and squealing "], "question": "which entity is not running continuously", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "dishes cling together then a man begins to speak"], "sample_ids": ["sncRqQ67iJU", "sQGXqGcwOTc"], "start_seconds": ["460", "3"], "properties": ["loud, repeatedly, man", "cling, speak, dishes"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a person is snoring", "mechanisms are operating and water is splashing "], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "an infant crying frantically"], "sample_ids": ["vhJWZheqaE", "zwOBqeFTgiU"], "start_seconds": ["0", "30"], "properties": ["water drains unevenly, toilet flushes, water drains", "cry, infant, frantically"], "captions_pred_video": [null, "of the baby crying in the car seat"], "captions_pred_audio": ["a toilet is flushed", "a baby cries loudly"], "question": "which entity is a human", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "people cheer as a vehicle engine revs"], "sample_ids": ["wtDqrBygTcU", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["man, engine, run", "engine revs, vehicle, people"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking and a motor is running", "a truck is revving its engine and a man is speaking "], "question": "which entity has a man speaking as an engine runs?", "label": 0}, {"captions": ["a diesel truck engine runs while wind blows", "a woman speaks as she rubs two objects together"], "sample_ids": ["xyL9F5VrjkE", "vzxHnu-SFEw"], "start_seconds": ["20", "80"], "properties": ["engine, run, wind", "two objects, woman, speak"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["an engine runs and a man speaks", "vehicles pass by on a roadway"], "sample_ids": ["yT5WfYMRr-U", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["engine, run, man", "pass, vehicle, roadway"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "birds chirp and objects are moved around"], "sample_ids": ["uPDn2BFTHk", "yPUYU6t3rwo"], "start_seconds": ["140", "370"], "properties": ["lady, laugh, baby", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a baby laughs and a woman speaks", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a clicking followed by some people laughing and a kid speaking", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["vz8868znkVQ", "ziUT9IFTkjg"], "start_seconds": ["60", "10"], "properties": ["audio, click, kid speaking", "background, birds, rustling"], "captions_pred_video": ["a video of a plane flying over a cloudy sky", null], "captions_pred_audio": ["a baby is laughing and breathing with background noise ", "birds are chirping and a chime is ringing "], "question": "which entity has a bird in the background?", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "a vehicle is skidding and squealing tires"], "sample_ids": ["wztCSUxOf8", "soTOh3zYJfY"], "start_seconds": ["130", "40"], "properties": ["a crowd, yells, applauds", "vehicle, skid, tires"], "captions_pred_video": [null, "a red car drifting on a winding road with smoke coming out of it"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a helicopter engine idles continuously", "an airplane engine spools and people speak"], "sample_ids": ["ugHJF0hfYkg", "wTjoRj1se3U"], "start_seconds": ["10", "390"], "properties": ["engine, idle, continuously", "airplane, engine, spool"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a helicopter is flying overhead ", "a jet engine is running and people are talking"], "question": "which entity has a moving engine", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vf9xf3vMsGM", "xBxDz0CFVn0"], "start_seconds": ["540", "30"], "properties": ["A man speaks while turning a water faucet on.", "stream, water, flow"], "captions_pred_video": ["of the person washing their hands under the faucet", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing?", "label": 1}, {"captions": ["an audience gives applause", "water flows as a woman laughs and a man speaks"], "sample_ids": ["x6iCUDmRpKQ", "vddP56-ogds"], "start_seconds": ["38", "30"], "properties": ["applause, audience, give", "water, flow, laugh"], "captions_pred_video": ["a black background with the moon and stars in the sky", null], "captions_pred_audio": ["a group of people are clapping and cheering", "water is running and gurgling and a man is speaking"], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["w0xsN8X18Y", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["music, surface, rain", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["running water in a faucet with some clinks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["zNRChLjqcU", "uZesmtKZGSw"], "start_seconds": ["220", "250"], "properties": ["water, faucet, run", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["water is running from a faucet into a sink", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man speaks as insects buzz and a bird chirps", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["t25U-v4k4ts", "sLUnaPT5gM8"], "start_seconds": ["40", "0"], "properties": ["a, chirps, bird", "loud, laughter, intermittent"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["an adult woman and an adult man speak", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["zTLVJCo4WEE", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["two people, adult, speak", "airplane, boy, fly"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a woman speaks while a helicopter flies overhead "], "question": "which entity is about a boy speaking?", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "a vehicle accelerates and squeals tires"], "sample_ids": ["tK4VlLsNxak", "yRx9txMcBl0"], "start_seconds": ["120", "40"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "accelerates, tires, squeals"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a car is revving its engine and skidding "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "water pouring and bubbling"], "sample_ids": ["vs65y4qmyBE", "uyRfq-jKPpo"], "start_seconds": ["340", "50"], "properties": ["engine, run, man", "water, bubbles, pouring"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["several ducks are quacking and squawking", "a vehicle engine accelerating then running on idle"], "sample_ids": ["wfHeoPDLMaM", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["quacking, squawking, ducks", "engine, accelerate, idle"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["ducks are quacking", "an engine is idling"], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a man speaks, then dials a rotary telephone", "multiple people speak and children yell while water gurgles"], "sample_ids": ["tK4VlLsNxak", "vb1fPSDI4c"], "start_seconds": ["120", "30"], "properties": ["a, dial, telephone", "multiple, people, yell"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", null], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["yRx9txMcBl0", "tdWhHV3X25Q"], "start_seconds": ["40", "60"], "properties": ["accelerates, tires, squeals", "applause, audience, yells"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a woman and man speak while food is frying", "multiple people speak and children yell while water gurgles"], "sample_ids": ["zk-xJGQU8-4", "vb1fPSDI4c"], "start_seconds": ["130", "30"], "properties": ["food, man, woman", "multiple, people, yell"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", null], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "a horn rings out as a machine runs by"], "sample_ids": ["wudZTNBtVqc", "slZLHwNbbt4"], "start_seconds": ["60", "300"], "properties": ["accelerates, engine, wind", "a, horn, run"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "a man talks while metallic objects are rapped and steam is released"], "sample_ids": ["sSMl2vc3ek", "vuUVPzd2FXw"], "start_seconds": ["20", "160"], "properties": ["a person, laughs, snores", "a, steam, release"], "captions_pred_video": [null, "of the person cooking on the grill with a spatula"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking and dishes are clanging"], "question": "which entity is about a person releasing steam?", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "someone is typing on a computer keyboard"], "sample_ids": ["tDVADusiIoc", "v0x1odnXtP0"], "start_seconds": ["60", "210"], "properties": ["water, radio, man", "keyboard, type, computer"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a person is typing on a keyboard"], "question": "which entity is a person", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "a infant makes noise and is excited"], "sample_ids": ["ul60S8TXDA8", "wIJK3-5y0kA"], "start_seconds": ["60", "30"], "properties": ["sound, distance, bell", "noise, excited, infant"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "an airplane engine spools and people speak"], "sample_ids": ["vJ7JPEFhyLA", "wTjoRj1se3U"], "start_seconds": ["16", "390"], "properties": ["three men, wind, flow", "airplane, engine, spool"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a jet engine is running and people are talking"], "question": "which entity is about a moving object", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "an insect buzzes around continuously"], "sample_ids": ["yRx9txMcBl0", "v25l1jef3JY"], "start_seconds": ["40", "0"], "properties": ["accelerates, tires, squeals", "buzzes, continuously, insect"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a fly is buzzing around a microphone "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["birds chirp as a bell rings", "birds chirp and objects are moved around"], "sample_ids": ["ziUT9IFTkjg", "yPUYU6t3rwo"], "start_seconds": ["10", "370"], "properties": ["chirp, bell, ring", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["y8dSeubCNI", "sLUnaPT5gM8"], "start_seconds": ["4", "0"], "properties": ["engine revving, people speaking, motorcycle", "loud, laughter, intermittent"], "captions_pred_video": [null, "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["an engine revving and people talking in the background", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["running water in a faucet with some clinks", "an airplane engine spools and people speak"], "sample_ids": ["zNRChLjqcU", "wTjoRj1se3U"], "start_seconds": ["220", "390"], "properties": ["water, faucet, run", "airplane, engine, spool"], "captions_pred_video": [null, "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["water is running from a faucet into a sink", "a jet engine is running and people are talking"], "question": "which entity is a moving object", "label": 1}, {"captions": ["a duck quacks several times", "a series of light horn beeps is followed by a loud steam whistle"], "sample_ids": ["vh30P49Po6s", "wnpJndXuxLc"], "start_seconds": ["30", "50"], "properties": ["quacks, duck, several", "beeps, loud, whistle"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a duck is quacking loudly", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "vehicles pass by on a roadway"], "sample_ids": ["tDVADusiIoc", "tgbONvsP47Y"], "start_seconds": ["60", "0"], "properties": ["man, radio, blows", "pass, vehicle, roadway"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "a child speaks"], "sample_ids": ["vddP56-ogds", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["liquid, laughs, man", "a, child, speaks"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["s7knHCFW82w", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["blow horn, get close, train", "a woman, laughs, animal"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "dishes cling together then a man begins to speak"], "sample_ids": ["v0x1odnXtP0", "sQGXqGcwOTc"], "start_seconds": ["210", "3"], "properties": ["keyboard, type, computer", "cling, speak, dishes"], "captions_pred_video": ["how to make money on youtube in spanish", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a person is typing on a keyboard", "mechanisms are operating and water is splashing "], "question": "which entity is about speaking", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "a vehicle is skidding and squealing tires"], "sample_ids": ["vXlk0lIQBFo", "soTOh3zYJfY"], "start_seconds": ["470", "40"], "properties": ["wind, speak, vocalize", "vehicle, skid, tires"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "a red car drifting on a winding road with smoke coming out of it"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "wind blows as people chatter quietly"], "sample_ids": ["zF8yoL0rkbI", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["engine, run, someone", "wind, chatter, people"], "captions_pred_video": ["footage of the traffic on the street at night", "footage is blurry and out of focus"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["an insect buzzes around continuously", "a train horn blows as it passes by"], "sample_ids": ["v25l1jef3JY", "zVacuqSb4LI"], "start_seconds": ["0", "30"], "properties": ["buzzes, continuously, insect", "horn, blows, train"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vzxHnu-SFEw", "tDVADusiIoc"], "start_seconds": ["80", "60"], "properties": ["two objects, woman, speak", "water, radio, man"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "a man speaks as a car is passing by"], "sample_ids": ["xOZfdgAgJ9o", "sK4u5T8hW78"], "start_seconds": ["40", "30"], "properties": ["woman, whimpering, speaking", "a, car, pass"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["wind blows as people chatter quietly", "a person snores loudly multiple times at a close distance"], "sample_ids": ["xBxDz0CFVn0", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["wind, chatter, people", "loud, multiple, distance"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a person is whistling", "water pouring and bubbling"], "sample_ids": ["sIXTftIuUgw", "uyRfq-jKPpo"], "start_seconds": ["90", "50"], "properties": ["person, whistling, person", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a person whistling a song", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks while water drains", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vSeGhaZt-aI", "xfaoyyzw2WU"], "start_seconds": ["50", "180"], "properties": ["water, drain, man", "loud, jet engine, roar"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a small engine spits as it runs", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["sZvwOuuPGP0", "wDVMhEdTiVw"], "start_seconds": ["50", "30"], "properties": ["spits, engine, runs", "gun, shoot, water"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a medium engine is running ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is not a gun?", "label": 0}, {"captions": ["wind blows and people scream while an engine revs", "a train horn blares as a train passes, then fades"], "sample_ids": ["w5W5Kqtc8E", "zVacuqSb4LI"], "start_seconds": ["100", "30"], "properties": ["wind, engine, scream", "blares, fades, train"], "captions_pred_video": [null, "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is about a train?", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["w5W5Kqtc8E", "uEU-Hg5MTN8"], "start_seconds": ["100", "27"], "properties": ["wind, blow, vehicle", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman is speaking and a baby is crying"], "question": "which entity is about a woman laughing?", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "several insects fly while two men talk"], "sample_ids": ["sjlVMgdGSK0", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["accelerates, vehicle, race car", "several, fly, men"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a moving object", "label": 0}, {"captions": ["water bubbles and gurgles.", "a door opens and closes"], "sample_ids": ["tB7hWb9gTuQ", "vBHyYJ8pL0"], "start_seconds": ["30", "2"], "properties": ["bubbles, gurgles, water", "open, close, door"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", null], "captions_pred_audio": ["water is splashing and gurgling", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is more likely to be in motion", "label": 1}, {"captions": ["birds vocalize and a man speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["v0wPrLBI3hg", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["vocalize, bird, speak", "engine, laugh, loud"], "captions_pred_video": ["footage of the pigeons feeding on the ground", "footage of a man driving a car in the dark"], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "an infant crying as a woman laughs"], "sample_ids": ["yajyRTUQk3U", "xhmRY9yhC7c"], "start_seconds": ["400", "20"], "properties": ["a woman, something, fried", "a, laugh, infant"], "captions_pred_video": ["- a woman cooking in the kitchen", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a baby cries and a woman speaks"], "question": "which woman is laughing", "label": 1}, {"captions": ["an airplane engine spools and people speak", "water flows as men speak and yell"], "sample_ids": ["wTjoRj1se3U", "vJ7JPEFhyLA"], "start_seconds": ["390", "16"], "properties": ["airplane, engine, spool", "water, flow, men"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a jet engine is running and people are talking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a moving object?", "label": 0}, {"captions": ["a helicopter engine runs continuously", "a man speaks followed by another man speaking outside"], "sample_ids": ["ugHJF0hfYkg", "viuTg1M-dqg"], "start_seconds": ["10", "30"], "properties": ["engine, running, continuously", "two men, speak, follow"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a man speaking followed by another man speaking outside?", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "dishes cling together then a man begins to speak"], "sample_ids": ["sOa7g-44Dag", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["audio, scratching, man", "cling, speak, dishes"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a heavy rain falls endlessly", "a man speaks as a car is passing by"], "sample_ids": ["wP8ZKrlx3oA", "sK4u5T8hW78"], "start_seconds": ["40", "30"], "properties": ["heavy, rain, fall", "a, car, pass"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a man is speaking with background noise and breathing sounds "], "question": "which entity is moving", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "a chime of a clock followed by various tones of ticking with come clinking"], "sample_ids": ["ugHJF0hfYkg", "uqFtmnhuqA8"], "start_seconds": ["10", "30"], "properties": ["loud, intense, propeller", "a, b, c"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "shows a clock on the wall of a room with a person standing in front of it"], "captions_pred_audio": ["a helicopter is flying overhead ", "mechanisms are ticking and a hammer is striking "], "question": "which entity is quieter", "label": 1}, {"captions": ["rain falls on a surface as men speak and thunder roars", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["w0xsN8X18Y", "tDlysoZiA1I"], "start_seconds": ["30", "0"], "properties": ["rain, thunder, surface", "animal, grunts, chirps"], "captions_pred_video": [null, "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "birds are chirping and a rooster is crowing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["yZrFNS7GFBQ", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["pigeon, buzzes, insect", "rooster, crow, background, men"], "captions_pred_video": ["of the bird in the cage", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["an owl hoots in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a bird?", "label": 0}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "a man speaks as insects buzz and a bird chirps"], "sample_ids": ["t69a8aRKhmc", "t25U-v4k4ts"], "start_seconds": ["30", "40"], "properties": ["a, b, c", "a, chirps, bird"], "captions_pred_video": ["footage is blurry and out of focus", "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking and bees are buzzing"], "question": "which entity has a bird chirp?", "label": 1}, {"captions": ["a person whistles and clicks a mouse", "water flows and trickles"], "sample_ids": ["zCrAfDfv6-A", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["person, mouse, click", "water, flow, trickle"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a person whistles a song", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a vehicle engine revs and tires squeal", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["yDoT73BWsdA", "tdWhHV3X25Q"], "start_seconds": ["10", "60"], "properties": ["engine revs, tires squeal, vehicle", "applause, audience, yells"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a child yells and another yells", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vMDHu7Lxcgw", "su6FAOcOA8c"], "start_seconds": ["410", "4"], "properties": ["two, yell, child", "engine, idle, woman"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "a car accelerates and wind blows"], "sample_ids": ["wP8ZKrlx3oA", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["rain, storm, thunder", "accelerates, wind, blows"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", null], "captions_pred_audio": ["a heavy rain is falling on a surface", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["an insect buzzes around continuously", "a machine beeps continuously"], "sample_ids": ["v25l1jef3JY", "y682ml90jGw"], "start_seconds": ["0", "11"], "properties": ["buzzes, continuously, insect", "beeps, machine, continuously"], "captions_pred_video": ["a black background with a cartoon character in the foreground", null], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a beeping sound is being made "], "question": "which entity is a machine", "label": 1}, {"captions": ["a train horn blows as it passes by", "a vehicle engine accelerating then running on idle"], "sample_ids": ["zVacuqSb4LI", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["horn, blows, train", "engine, accelerate, idle"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["xyx6eNVEYRY", "zj2R0XoFr5k"], "start_seconds": ["380", "50"], "properties": ["loud, engine, muffles", "airplane, boy, fly"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a baby cries and a woman speaks", "vehicles pass by on a roadway"], "sample_ids": ["tMbMDvT50j8", "tgbONvsP47Y"], "start_seconds": ["12", "0"], "properties": ["a, cry, woman", "pass, vehicle, roadway"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a baby cries and a woman speaks", "a car is driving on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["yswmmRZFItk", "tiDFTC-5vU"], "start_seconds": ["0", "30"], "properties": ["background, frog, croak", "male, duck, laugh"], "captions_pred_video": ["a close up of a frog in the water", null], "captions_pred_audio": ["a frog is croaking", "a man is speaking and ducks are quacking"], "question": "which entity is speaking", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "several insects fly while two men talk"], "sample_ids": ["zkKdxzNC97Y", "s-T9OVOiMLo"], "start_seconds": ["27", "330"], "properties": ["hard, surface, door", "several, fly, men"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a person snoring several times", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["spJCm8tD9Zo", "uZesmtKZGSw"], "start_seconds": ["90", "250"], "properties": ["snore, person, several", "men, talk, cars"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["birds chirp and a pop occurs before a man speaks", "vehicles pass by on a roadway"], "sample_ids": ["zuua6-5goWw", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["sound, pop, bird", "pass, vehicle, roadway"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "footage of a fire truck entering a garage"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a car is driving on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a stream of water runs briefly"], "sample_ids": ["zY3icUyMdh8", "x-PeY8Yb8M4"], "start_seconds": ["20", "300"], "properties": ["dog, bark, engine", "stream, water, run"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["ugHJF0hfYkg", "zFjIWfSD-4"], "start_seconds": ["10", "410"], "properties": ["loud, intense, propeller", "People, motor, brakes"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which is quieter", "label": 1}, {"captions": ["someone whistles a song", "several insects fly while two men talk"], "sample_ids": ["sIXTftIuUgw", "s-T9OVOiMLo"], "start_seconds": ["90", "330"], "properties": ["someone, song, whistle", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a person whistling a song", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a person", "label": 0}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "a man speaks as a motor runs in the background"], "sample_ids": ["wP8ZKrlx3oA", "xZepNM9qcRA"], "start_seconds": ["40", "30"], "properties": ["rain, storm, thunder", "background, motor, run"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "waves crash against a shoreline and people speak"], "sample_ids": ["vlJS7LN2XyM", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["background, clocks, ticking", "wave, crash, shoreline"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more active", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["zkKdxzNC97Y", "uZesmtKZGSw"], "start_seconds": ["27", "250"], "properties": ["hard, surface, door", "men, talk, cars"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["goats bleat and metal clings", "a clock ticktocks"], "sample_ids": ["tH17JPjDPnc", "v-g-j2uTByM"], "start_seconds": ["260", "30"], "properties": ["bleat, metal, clings", "ticktocks, clock, ticktocks"], "captions_pred_video": ["feed of the goats eating hay in the barn", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["y2ZBGpgbhHM", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["animal, growl, bird", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds chirping and a dog panting", "a jet engine roars "], "question": "which entity is followed by a man laughing", "label": 1}, {"captions": ["birds chirp as a bell rings", "some men converse over an engine running"], "sample_ids": ["ziUT9IFTkjg", "sCiy7QS1U"], "start_seconds": ["10", "300"], "properties": ["chirp, bell, ring", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a human activity", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "water flows as men speak and yell"], "sample_ids": ["wvKpEYswXO0", "vJ7JPEFhyLA"], "start_seconds": ["150", "16"], "properties": ["sound, water, running", "water, flow, men"], "captions_pred_video": ["of the person preparing food in the kitchen", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more water flowing", "label": 1}, {"captions": ["a vehicle engine accelerating then running on idle", "a toilet flushes and a female speaks"], "sample_ids": ["vYkA3cfXp5Q", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["engine, accelerate, idle", "female, flushes, toilet"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "footage is blurry and out of focus"], "captions_pred_audio": ["an engine is idling", "a toilet flushes and a man speaks"], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "a woman speaks with water running"], "sample_ids": ["sQwlkXjQabo", "wTideSjRFS0"], "start_seconds": ["10", "30"], "properties": ["liquid, surface, spray", "water, running, woman"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["spraying followed by silence", "a woman is speaking while water is running in the background"], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["bees buzz and wind blows", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tMJne1a4AFI", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["bees buzz, wind blows, bees", "a woman, something, fried"], "captions_pred_video": ["a swarm of bees on the ground", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a swarm of bees buzzing around", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["birds vocalize and a man speaks", "a clock ticktocks"], "sample_ids": ["v0wPrLBI3hg", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["vocalize, bird, speak", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of the pigeons feeding on the ground", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["yks4cLgIDMc", "wz7N8YRy74I"], "start_seconds": ["170", "30"], "properties": ["background, speaking, child", "rooster, crow, background, men"], "captions_pred_video": ["footage of two kids wrestling on the floor", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and a child is crying", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "a duck quacks continuously"], "sample_ids": ["tDVADusiIoc", "vh30P49Po6s"], "start_seconds": ["60", "30"], "properties": ["wind, radio, waves", "quacks, continuously, duck"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a person whistles and clicks a mouse", "a few ducks quack and scamper and a man speaks"], "sample_ids": ["zCrAfDfv6-A", "w2bYrCVLT60"], "start_seconds": ["30", "120"], "properties": ["person, mouse, click", "ducks, speak, quack"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "of the ducks drinking from a pink pool in the grass"], "captions_pred_audio": ["a person whistles a song", "ducks are quacking and a man is speaking"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a person snoring several times", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["spJCm8tD9Zo", "zFjIWfSD-4"], "start_seconds": ["90", "410"], "properties": ["snore, person, several", "People, motor, brakes"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a person", "label": 0}, {"captions": ["a person is burping then speaks and laughs", "dishes cling together then a man begins to speak"], "sample_ids": ["wAAkbZToh8", "sQGXqGcwOTc"], "start_seconds": ["0", "3"], "properties": ["burp, laugh, speak", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man burps and a woman speaks", "mechanisms are operating and water is splashing "], "question": "which entity is about speaking", "label": 1}, {"captions": ["a duck quacks several times", "a car speeding up in the distance"], "sample_ids": ["vh30P49Po6s", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["quacks, duck, several", "distance, car, speed"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "water splashes as an animal walks through"], "sample_ids": ["zgUgkpk78xU", "w1ir-sZ3Im8"], "start_seconds": ["70", "90"], "properties": ["horn, bells, ring", "animal, water, splashes"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "water splashes and gurgles as people speak"], "question": "which entity is more likely to be heard", "label": 0}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "a car accelerates and wind blows"], "sample_ids": ["wjsXBsc7M40", "u0TrcHhkPQ"], "start_seconds": ["10", "20"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "accelerates, wind, blows"], "captions_pred_video": ["footage of the baby playing with a toothbrush", null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "an insect buzzes around continuously"], "sample_ids": ["zuua6-5goWw", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["birds, chirp, quiet, man, speaks", "buzzes, continuously, insect"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a fly is buzzing around a microphone "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a horn rings out as a machine runs by"], "sample_ids": ["vBHyYJ8pL0", "slZLHwNbbt4"], "start_seconds": ["2", "300"], "properties": ["noise, door, opening", "a, horn, run"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is accompanied by a door opening and closing", "label": 0}, {"captions": ["some tunes played by whistling", "a toilet flushes and a female speaks"], "sample_ids": ["u6BnG6YZqJ4", "yaln9y8I7ms"], "start_seconds": ["0", "230"], "properties": ["tune, play, whistling", "female, flushes, toilet"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", "footage is blurry and out of focus"], "captions_pred_audio": ["a person whistling a song", "a toilet flushes and a man speaks"], "question": "which entity is a video", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "water is sprayed across a hard surface"], "sample_ids": ["sofxkNWaP0s", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["wind, engine, louder", "water, spray, surface"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "a child speaks in closed space"], "sample_ids": ["sLUnaPT5gM8", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["loud, laughter, intermittent", "child, space, speak"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a child yells and another yells", "a horn honks and then loudly blares"], "sample_ids": ["vMDHu7Lxcgw", "wnpJndXuxLc"], "start_seconds": ["410", "50"], "properties": ["two, yell, child", "horn, honk, loud"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["a toilet door squeaks as it is opened", "a horse runs while two women talk"], "sample_ids": ["sdXV-ylviw", "sdvI1mHAsc"], "start_seconds": ["190", "20"], "properties": ["door, toilet, squeaks", "two women, horse, run"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dog barks and taps with background noise ", "horses clip-clop and a woman speaks"], "question": "which entity is a living thing", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "some men converse over an engine running"], "sample_ids": ["sQGXqGcwOTc", "sCiy7QS1U"], "start_seconds": ["3", "300"], "properties": ["audio, kid, giggles", "men, converse, engine"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", null], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a video", "label": 1}, {"captions": ["a man speaks as a machine runs", "a man speaks as a machine runs"], "sample_ids": ["vD6lYD1l0BY", "vD6lYD1l0BY"], "start_seconds": ["330", "330"], "properties": ["a, machine, run", "a, machine, run"], "captions_pred_video": ["game controller being held in the hands of the person", "game controller being held in the hands of the person"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a man is speaking and dishes are being washed "], "question": "which machine is running in the first image?", "label": 0}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vddP56-ogds", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["water, splash, person, laugh", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "a piece of wood is being placed down and sawed"], "sample_ids": ["tQWGZLItBXk", "uiItxDsDMFI"], "start_seconds": ["170", "30"], "properties": ["voice, music, whoosh", "wood, piece, saw"], "captions_pred_video": ["worms revolution screenshots", "a man cutting a log with an axe in the woods"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a saw is being used with background noise "], "question": "which entity is a video of a person sawing wood?", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "paper is crumpling consistently"], "sample_ids": ["uJV8NDaHqqk", "v5cSxLaHADY"], "start_seconds": ["100", "0"], "properties": ["loud, fly, chirp", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a bee hive in a wooden box", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a swarm of bees buzzing around", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "wind blows as people chatter quietly"], "sample_ids": ["sncRqQ67iJU", "xBxDz0CFVn0"], "start_seconds": ["460", "30"], "properties": ["loud, repeatedly, man", "wind, chatter, people"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "footage is blurry and out of focus"], "captions_pred_audio": ["a person is snoring", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a woman speaks and other women and a man talk with her", "an airplane engine spools and people speak"], "sample_ids": ["vbpKkWvfOu4", "wTjoRj1se3U"], "start_seconds": ["560", "390"], "properties": ["a, woman, man", "airplane, engine, spool"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a jet engine is running and people are talking"], "question": "which entity is a video of a woman speaking and other women and a man talk with her?", "label": 0}, {"captions": ["birds chirp and objects are moved around", "a woman speaks happily and an animal chirps"], "sample_ids": ["yPUYU6t3rwo", "uWAAAL4CIoc"], "start_seconds": ["370", "0"], "properties": ["birds chirp, objects are moved around, birds", "a woman, chirps, animal"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", null], "captions_pred_audio": ["insects buzz and a man speaks", "a woman is speaking and a dog is barking "], "question": "which entity is a video", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["xyL9F5VrjkE", "uYT5gxnyMWM"], "start_seconds": ["20", "50"], "properties": ["wind, blows, vehicle", "female, spraying, scream"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a woman and man speak while food is frying", "an infant crying frantically"], "sample_ids": ["zk-xJGQU8-4", "zwOBqeFTgiU"], "start_seconds": ["130", "30"], "properties": ["food, man, woman", "cry, infant, frantically"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "of the baby crying in the car seat"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a speedboat passes quickly on the water", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["tjmoSi330GM", "wz7N8YRy74I"], "start_seconds": ["23", "30"], "properties": ["speed, water, boat", "rooster, crow, background, men"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a rooster?", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "an airplane flies overhead as a woman speaks"], "sample_ids": ["ul60S8TXDA8", "zj2R0XoFr5k"], "start_seconds": ["60", "50"], "properties": ["sound, distance, bell", "airplane, fly, overhead"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying overhead", "label": 1}, {"captions": ["animals bleat and cry out and then a woman speaks", "a man speaks as a car is passing by"], "sample_ids": ["yZp6xizR0yU", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["animal, bleat, cry", "a, car, pass"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["people clap and speak in the distance", "someone snores nearby"], "sample_ids": ["wwyfGO2J4", "spJCm8tD9Zo"], "start_seconds": ["90", "90"], "properties": ["clap, distance, speak", "someone snores, nearby, someone"], "captions_pred_video": [null, "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a person is snoring loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["people speak softly as food sizzles", "sirens ring and approach with humming of distant traffic"], "sample_ids": ["yhQ2Lg-7qDY", "xERFUeZONz8"], "start_seconds": ["130", "0"], "properties": ["food, sizzle, speak", "ring, approach, traffic"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "footage is blurry due to camera shake or motion blur"], "captions_pred_audio": ["a faucet is running and a man is speaking", "an emergency vehicle siren blares"], "question": "which entity is more quiet", "label": 0}, {"captions": ["people cheer as a vehicle engine revs", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["xjhAnI2q6hM", "tdWhHV3X25Q"], "start_seconds": ["6", "60"], "properties": ["engine revs, vehicle, people", "applause, audience, yells"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a bird is chirping and tweeting a bird song", "water splashes as an animal walks through"], "sample_ids": ["wPz6QRAkEb4", "w1ir-sZ3Im8"], "start_seconds": ["60", "90"], "properties": ["chirps, tweets, song", "animal, water, splashes"], "captions_pred_video": ["a bird in a cage on top of a pole", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["birds are chirping in the background ", "water splashes and gurgles as people speak"], "question": "which entity is not a bird?", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "pigeons vocalize and birds chirp"], "sample_ids": ["wSVhSdj0F0", "uiS58TNyUiw"], "start_seconds": ["10", "430"], "properties": ["horn honks, keys jingle, slam", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird?", "label": 1}, {"captions": ["several ducks are quacking and squawking", "a car accelerates and wind blows"], "sample_ids": ["wfHeoPDLMaM", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["quacking, squawking, ducks", "accelerates, wind, blows"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", null], "captions_pred_audio": ["ducks are quacking", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a toilet door squeaks as it is opened", "a airplane flies overhead as a woman speaks"], "sample_ids": ["sdXV-ylviw", "zj2R0XoFr5k"], "start_seconds": ["190", "50"], "properties": ["door, toilet, squeaks", "airplane, fly, woman"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a dog barks and taps with background noise ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is moving", "label": 1}, {"captions": ["a duck quacks continuously", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vh30P49Po6s", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["quacks, continuously, duck", "male, duck, laugh"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking and ducks are quacking"], "question": "which duck is speaking", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["sZPuqDgX2V0", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["commentator, race, track", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a race car accelerates and revs its engine "], "question": "which vehicle is racing around a track", "label": 0}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["wz7N8YRy74I", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["rooster, crow, background, people", "loud, laughter, intermittent"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a motorcycle engine works nearby", "water flows as men speak and yell"], "sample_ids": ["tOSWIURC-4", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["engine, work, nearby", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a lawn mower is running ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["a person is snoring while sleeping", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vJrjSeP17yE", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["a person is sleeping, snoring, person", "female, spraying, scream"], "captions_pred_video": ["a black background with a small plane flying in the sky", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 0}, {"captions": ["a propeller moves loudly nearby", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["ugHJF0hfYkg", "yajyRTUQk3U"], "start_seconds": ["10", "400"], "properties": ["loud, propeller, move", "a woman, something, fried"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a woman speaks followed by clicks and scraping", "paper is crumpling consistently"], "sample_ids": ["yYJksgsxx5U", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["audio, clicks, scraping", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["speaking following by laughing and clapping", "an infant crying as a woman laughs"], "sample_ids": ["u2f5NpsoHBg", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["person, laugh, clap", "a, laugh, infant"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a baby cries and a woman speaks"], "question": "which person is laughing", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "water splashes as an animal walks through"], "sample_ids": ["zFjIWfSD-4", "w1ir-sZ3Im8"], "start_seconds": ["410", "90"], "properties": ["People, motor, brakes", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "an infant crying frantically"], "sample_ids": ["tDlfY3nmx1A", "zwOBqeFTgiU"], "start_seconds": ["160", "30"], "properties": ["applause, laugh, man", "cry, infant, frantically"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "of the baby crying in the car seat"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["goats bleat and people speak", "an insect buzzes around continuously"], "sample_ids": ["z5iUE5h0EPs", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["goats bleat, people speak, language", "buzzes, continuously, insect"], "captions_pred_video": ["of the goat in the barn", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a goat bleats and a man speaks", "a fly is buzzing around a microphone "], "question": "which entity is not a person", "label": 1}, {"captions": ["a child speaks", "people cheer as a vehicle engine revs"], "sample_ids": ["yW6FWLSLkx4", "xjhAnI2q6hM"], "start_seconds": ["40", "6"], "properties": ["a, child, speaks", "engine revs, vehicle, people"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["water flows followed by women screaming", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["w5W5Kqtc8E", "vlS6YMeWAPo"], "start_seconds": ["100", "40"], "properties": ["water, flow, women", "sheep, baa, birds"], "captions_pred_video": [null, "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a goat bleats and birds chirp"], "question": "which entity is followed by birds chirping", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "an airplane engine spools and people speak"], "sample_ids": ["zofjfKhqLk8", "wTjoRj1se3U"], "start_seconds": ["10", "390"], "properties": ["noise, stop, motor", "airplane, engine, spool"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a jet engine is running and people are talking"], "question": "which entity is a moving object", "label": 1}, {"captions": ["people speak as gunfire rings out", "a woman talks while a baby cries and a man whispers"], "sample_ids": ["wqTCwqVRDlk", "smDKStoHBJo"], "start_seconds": ["80", "0"], "properties": ["gunfire, ring, speak", "a, talk, baby, cry"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "a man holding a crying baby in his arms"], "captions_pred_audio": ["a man is speaking and a gun is fired", "a baby is crying and a woman is speaking"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "several insects fly while two men talk"], "sample_ids": ["sofxkNWaP0s", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["wind, engine, louder", "several, fly, men"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more flying insects", "label": 1}, {"captions": ["a jet engine spools up and takes off", "a train horn blows as it passes by"], "sample_ids": ["vBslzh7saPw", "zVacuqSb4LI"], "start_seconds": ["90", "30"], "properties": ["engine, spools, takes", "horn, blows, train"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train?", "label": 1}, {"captions": ["an adult woman and an adult man speak", "water pouring and bubbling"], "sample_ids": ["zTLVJCo4WEE", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["two people, adult, speak", "water, bubbles, pouring"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a woman speaks and crickets chirp", "water is running from a faucet"], "question": "which entity is more likely to be in a bathroom", "label": 1}, {"captions": ["a person speaks over rustling leaves", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["zOZleIRqZm4", "su6FAOcOA8c"], "start_seconds": ["80", "4"], "properties": ["rustling, leaves, person", "engine, idle, woman"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking and a subway train is moving "], "question": "which entity is a person speaking over?", "label": 0}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "a car accelerates and wind blows"], "sample_ids": ["yI-KvObbDoY", "u0TrcHhkPQ"], "start_seconds": ["260", "20"], "properties": ["sound, smack, wind", "accelerates, wind, blows"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", null], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["people speak and tapping occurs", "a propeller rotates loudly and intensely"], "sample_ids": ["tFCUUGdREgA", "ugHJF0hfYkg"], "start_seconds": ["70", "10"], "properties": ["people, tap, speak", "loud, intense, propeller"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a man is filing a hard object", "an airplane engine spools and people speak"], "sample_ids": ["vveS8HT7Uog", "wTjoRj1se3U"], "start_seconds": ["100", "390"], "properties": ["a man, hard, object", "airplane, engine, spool"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a jet engine is running and people are talking"], "question": "which object is moving", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "heavy rain splashes as it falls"], "sample_ids": ["vdoxuJn9lTc", "wP8ZKrlx3oA"], "start_seconds": ["40", "40"], "properties": ["burp, loud, girl", "fall, rain, splash"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "footage of a flooded street in the middle of a desert with mountains in the background"], "captions_pred_audio": ["a child speaks followed by a burp", "a heavy rain is falling on a surface"], "question": "which entity is more likely to cause a splash", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "a man speaks, another man speaks, and a small bell dings"], "sample_ids": ["yajyRTUQk3U", "t69a8aRKhmc"], "start_seconds": ["400", "30"], "properties": ["a woman, something, fried", "a, b, c"], "captions_pred_video": ["- a woman cooking in the kitchen", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking and birds are chirping in the background "], "question": "which entity has a woman talking?", "label": 0}, {"captions": ["water running down a sink while a man is talking", "a clock ticktocks"], "sample_ids": ["vSeGhaZt-aI", "v-g-j2uTByM"], "start_seconds": ["50", "30"], "properties": ["water, sink, talk", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "motors runs briefly and tires screech"], "sample_ids": ["su6FAOcOA8c", "yRx9txMcBl0"], "start_seconds": ["4", "40"], "properties": ["engine, idle, woman", "motors, tires, screech"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a car is revving its engine and skidding "], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["a train horn sounds as a railroad passing bell rings", "someone whistles a tune"], "sample_ids": ["zgUgkpk78xU", "sIXTftIuUgw"], "start_seconds": ["70", "90"], "properties": ["horn, bell, train", "someone, tune, whistle"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", null], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a person whistling a song"], "question": "which entity is a human", "label": 1}, {"captions": ["vehicles pass by on a roadway", "an airplane engine spools and people speak"], "sample_ids": ["tgbONvsP47Y", "wTjoRj1se3U"], "start_seconds": ["0", "390"], "properties": ["pass, vehicle, roadway", "airplane, engine, spool"], "captions_pred_video": ["footage of a fire truck entering a garage", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a car is driving on the road ", "a jet engine is running and people are talking"], "question": "which is not a moving object", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "water splashes as an animal walks through"], "sample_ids": ["uWAAAL4CIoc", "w1ir-sZ3Im8"], "start_seconds": ["0", "90"], "properties": ["a woman, chirps, animal", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "water splashes and gurgles as people speak"], "question": "which animal is more active", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["w5W5Kqtc8E", "wDVMhEdTiVw"], "start_seconds": ["100", "30"], "properties": ["wind, blow, vehicle", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["dogs barking and whimpering", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["tIY7qOV3rEM", "wqZ135Ssz0"], "start_seconds": ["0", "60"], "properties": ["barking, whimpering, dog", "two men, woman, birds"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", null], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a stream of water runs briefly"], "sample_ids": ["vzxHnu-SFEw", "x-PeY8Yb8M4"], "start_seconds": ["80", "300"], "properties": ["two objects, woman, speak", "stream, water, run"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a weapon fires multiple times", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["sMC07Ucy7kg", "uYT5gxnyMWM"], "start_seconds": ["10", "50"], "properties": ["weapon, fire, multiple", "a, scream, girl"], "captions_pred_video": ["footage is from a car's point of view", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "a person snores loudly multiple times at a close distance"], "sample_ids": ["tOj4tdLRaA", "sSMl2vc3ek"], "start_seconds": ["70", "20"], "properties": ["woman, laugh, baby", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["vbZ-0lGPneg", "vlS6YMeWAPo"], "start_seconds": ["30", "40"], "properties": ["a woman, a television program, a bird", "sheep, baa, birds"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a goat bleats and birds chirp"], "question": "which entity has more birds", "label": 1}, {"captions": ["a power tool runs and touches a surface", "several ducks quack and cocks crow far away"], "sample_ids": ["zfvPRf3chY", "sNB8zxXneIM"], "start_seconds": ["290", "20"], "properties": ["power tool, run, touch", "several, quack, cocks"], "captions_pred_video": [null, "a group of geese in a cage"], "captions_pred_audio": ["a man is speaking while a power tool is being used ", "a rooster is crowing and wind is blowing "], "question": "which is not a power tool", "label": 1}, {"captions": ["birds chirp and wind blows", "a man speaks followed by another man speaking outside"], "sample_ids": ["sxIvBMSavMQ", "viuTg1M-dqg"], "start_seconds": ["210", "30"], "properties": ["birds, chirp, wind", "two men, speak, follow"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a human speaking?", "label": 1}, {"captions": ["a vehicle engine runs while a siren and horn sound", "roadway noise occurs and a truck accelerates"], "sample_ids": ["u--KhUW8l1Y", "tgbONvsP47Y"], "start_seconds": ["0", "0"], "properties": ["engine, sound, horn", "noise, truck, accelerate"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a car is driving on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "a telephone rings followed by a woman talking"], "sample_ids": ["w2JXXIAdUdg", "tGcFnX0GHI"], "start_seconds": ["10", "0"], "properties": ["snoring, distance, person", "ring, talk, woman"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", null], "captions_pred_audio": ["a person snoring and a dog whimpering", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a heavy rain falls endlessly", "a toilet flushes and a female speaks"], "sample_ids": ["wP8ZKrlx3oA", "yaln9y8I7ms"], "start_seconds": ["40", "230"], "properties": ["heavy, rain, fall", "female, flushes, toilet"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage is blurry and out of focus"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a toilet flushes and a man speaks"], "question": "which entity is not a video of a toilet flushing?", "label": 0}, {"captions": ["ticking continues without interruption", "a clock ticktocks and sounds an alarm then a man laughs"], "sample_ids": ["v-g-j2uTByM", "xV7Mg1QucSc"], "start_seconds": ["30", "14"], "properties": ["ticking, continuous, clock", "alarm, ticktocks, laughs"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a clock is ticking loudly", "an alarm clock ticks and a woman laughs"], "question": "which clock is ticking continuously", "label": 0}, {"captions": ["a young woman speaks over spraying and another person yells", "a man speaks as a motor runs in the background"], "sample_ids": ["uYT5gxnyMWM", "xZepNM9qcRA"], "start_seconds": ["50", "30"], "properties": ["person, spray, yell", "background, motor, run"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "a machine beeps continuously"], "sample_ids": ["yRx9txMcBl0", "y682ml90jGw"], "start_seconds": ["40", "11"], "properties": ["accelerates, tires, squeals", "beeps, machine, continuously"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", null], "captions_pred_audio": ["a car is revving its engine and skidding ", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["a machine runs continuously", "water flows and trickles"], "sample_ids": ["wdXV3Pv0jiY", "tB7hWb9gTuQ"], "start_seconds": ["11", "30"], "properties": ["machine, running, continuously", "water, flow, trickle"], "captions_pred_video": ["footage is blurry and shaky", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "water is splashing and gurgling"], "question": "which entity is not a continuous flow", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["zALy31PjDl0", "zj2R0XoFr5k"], "start_seconds": ["21", "50"], "properties": ["a man, a vehicle, a horn", "airplane, boy, fly"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a woman speaks while a helicopter flies overhead "], "question": "which entity is about a vehicle?", "label": 0}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vfYTJq7nU", "xKB8O8LTs6s"], "start_seconds": ["130", "70"], "properties": ["rustling, ducks, quack", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a duck quacks and a woman speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "a stream of water flows as people talk and wind blows"], "sample_ids": ["slZLHwNbbt4", "xBxDz0CFVn0"], "start_seconds": ["300", "30"], "properties": ["clap, distance, horn", "stream, water, flow"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "footage is blurry and out of focus"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a man is speaking with wind noise in the background "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a horn rings out as a machine runs by"], "sample_ids": ["sK4u5T8hW78", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["a, car, pass", "a, horn, run"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a child babbles as a woman speaks", "water pouring and bubbling"], "sample_ids": ["wEBlkGWVWwE", "uyRfq-jKPpo"], "start_seconds": ["260", "50"], "properties": ["a, babble, woman", "water, bubbles, pouring"], "captions_pred_video": ["shows a person writing on the whiteboard", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["birds vocalize and chirp continuously", "a train engine runs and a horn blows"], "sample_ids": ["w1mlz3Pe4fU", "zPX9o1uDiI"], "start_seconds": ["300", "40"], "properties": ["vocalize, chirp, continuously", "engine, horn, run"], "captions_pred_video": ["of a bird in a cage", null], "captions_pred_audio": ["birds are chirping and singing", "a train moves with its horn blowing and wheels squealing "], "question": "which entity is not a train?", "label": 0}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "people applaud and hoot and chat quietly"], "sample_ids": ["ukg5L09Wpvo", "wwyfGO2J4"], "start_seconds": ["150", "90"], "properties": ["clickety-clack, train, whistle", "people, applaud, hoot"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", null], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "a car speeding up in the distance"], "sample_ids": ["wqN6IIHw3po", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["rain, surface, fall", "distance, car, speed"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", null], "captions_pred_audio": ["a man is speaking and water is splashing", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "water pouring and bubbling"], "sample_ids": ["wqZ135Ssz0", "uyRfq-jKPpo"], "start_seconds": ["60", "50"], "properties": ["two men, woman, birds", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "water is running from a faucet"], "question": "which entity is more likely to be in a bathroom", "label": 1}, {"captions": ["a child speaks", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["yW6FWLSLkx4", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["a, child, speaks", "female, spraying, scream"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a woman is speaking and a baby is crying"], "question": "which entity is a child speaking?", "label": 0}, {"captions": ["distant men speak as a spray can nozzle is depressed", "people speak as gunfire rings out"], "sample_ids": ["rwtmaKiCcQU", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["nozzle, depressed, spray can", "gunfire, ring, speak"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["spraying and people speaking", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "a telephone rings followed by a woman talking"], "sample_ids": ["t25U-v4k4ts", "tGcFnX0GHI"], "start_seconds": ["40", "0"], "properties": ["a, chirps, bird", "ring, talk, woman"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", null], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a human talking?", "label": 1}, {"captions": ["a beep repeats multiple times", "wind blowing followed by a zoom"], "sample_ids": ["y682ml90jGw", "vr8ZXjEBhMQ"], "start_seconds": ["11", "150"], "properties": ["beep, repeat, multiple", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a beeping sound is being made ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "paper is crumpling consistently"], "sample_ids": ["x6ijhqRY38s", "v5cSxLaHADY"], "start_seconds": ["250", "0"], "properties": ["bowl, silverware, man", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "a person is snoring while sleeping"], "sample_ids": ["zuua6-5goWw", "vJrjSeP17yE"], "start_seconds": ["30", "40"], "properties": ["birds, chirp, quiet, man, speaks", "a person is sleeping, snoring, person"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "a black background with a small plane flying in the sky"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a person snoring loudly"], "question": "which entity is quieter", "label": 0}, {"captions": ["small dogs yip and bark sharply", "a telephone rings followed by a woman talking"], "sample_ids": ["v-wcQf4BDY0", "tGcFnX0GHI"], "start_seconds": ["120", "0"], "properties": ["bark, yip, sharply", "ring, talk, woman"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", null], "captions_pred_audio": ["a dog barks and growls", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by an electronic beep", "a man speaks as a car is passing by"], "sample_ids": ["wSVhSdj0F0", "sK4u5T8hW78"], "start_seconds": ["10", "30"], "properties": ["horn honks, keys jingle, electronic beep", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a recording of a car passing by?", "label": 1}, {"captions": ["food is frying while a woman speaks", "dogs barking and whimpering"], "sample_ids": ["yhQ2Lg-7qDY", "tIY7qOV3rEM"], "start_seconds": ["130", "0"], "properties": ["food, woman, speak", "barking, whimpering, dog"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "a dog is standing in the middle of a dirt road in the woods"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a dog is barking and a cat is meowing"], "question": "which entity is a dog", "label": 1}, {"captions": ["a car speeding up in the distance", "an engine sputters followed by a car zooming by"], "sample_ids": ["u0TrcHhkPQ", "u5RmF3c3Aw"], "start_seconds": ["20", "60"], "properties": ["distance, car, speed", "engine, car, zoom"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a race car accelerates and skids with wind noise in the background "], "question": "which car is zooming by", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "waves crash against a shoreline and people speak"], "sample_ids": ["wqN6IIHw3po", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["rain, surface, fall", "wave, crash, shoreline"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking and water is splashing", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be seen in a movie", "label": 1}, {"captions": ["waves crash against a shoreline and people speak", "a car accelerates and wind blows"], "sample_ids": ["yFB25fqfU8I", "u0TrcHhkPQ"], "start_seconds": ["300", "20"], "properties": ["wave, crash, shoreline", "accelerates, wind, blows"], "captions_pred_video": ["footage of a person surfing in the ocean", null], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["heavy rain splashes as it falls", "an engine sputters followed by a car zooming by"], "sample_ids": ["wP8ZKrlx3oA", "u5RmF3c3Aw"], "start_seconds": ["40", "60"], "properties": ["fall, rain, splash", "engine, car, zoom"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", null], "captions_pred_audio": ["a heavy rain is falling on a surface", "a race car accelerates and skids with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "a infant makes noise and is excited"], "sample_ids": ["uZesmtKZGSw", "wIJK3-5y0kA"], "start_seconds": ["250", "30"], "properties": ["men, talk, cars", "noise, excited, infant"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "people applaud and hoot and chat quietly"], "sample_ids": ["vZAw4apG0Es", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["background, clock, ticktocks", "people, applaud, hoot"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["several insects fly while two men talk", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["s-T9OVOiMLo", "tDVADusiIoc"], "start_seconds": ["330", "60"], "properties": ["several, fly, men", "water, radio, man"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more water", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a male speaks and another male speaks"], "sample_ids": ["tDVADusiIoc", "viuTg1M-dqg"], "start_seconds": ["60", "30"], "properties": ["water, radio, man", "two males, speaking, male"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a goat screams and people speak in the background", "a crowd yells, reacts and applauds"], "sample_ids": ["xC8kbrKJmco", "wztCSUxOf8"], "start_seconds": ["0", "130"], "properties": ["background, goat, scream", "a crowd, yells, applauds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a goat is bleating ", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["w5W5Kqtc8E", "ziUT9IFTkjg"], "start_seconds": ["100", "10"], "properties": ["wind, engine, scream", "background, birds, rustling"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["tGcFnX0GHI", "zl9Dqx-j7q4"], "start_seconds": ["0", "6"], "properties": ["ring, talk, woman", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a jet engine roars "], "question": "which entity is followed by a man laughing", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "here comes the train and it starts to blow the horn and get close"], "sample_ids": ["zj2R0XoFr5k", "s7knHCFW82w"], "start_seconds": ["50", "30"], "properties": ["airplane, fly, overhead", "blow horn, get close, train"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage of the train on the tracks near a building and a car parked on the side of the road"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a train is blowing its horn and its wheels are squealing "], "question": "which is a train", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "water drips and bubbles as a man speaks"], "sample_ids": ["yVumC9TGknc", "vSeGhaZt-aI"], "start_seconds": ["30", "50"], "properties": ["humming, clock, birds", "water, bubbles, speak"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a series of beeps and chirps", "a man is speaking and pouring liquid with background noise "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["an engine starts and increases in power", "a man talks followed by a woman shouting"], "sample_ids": ["zjTG0gaGCUI", "s3cTDAj31g"], "start_seconds": ["80", "80"], "properties": ["power, increase, engine", "man, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a jet engine roars as wind blows ", "a man is speaking and a baby is crying"], "question": "which entity is a human", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "people speak as gunfire rings out"], "sample_ids": ["sapQIQUhFc", "wqTCwqVRDlk"], "start_seconds": ["280", "80"], "properties": ["liquid, flow, distance", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a duck quacks continuously", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vh30P49Po6s", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["quacks, continuously, duck", "airplane, boy, fly"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a duck is quacking loudly", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "pigeons vocalize and birds chirp"], "sample_ids": ["xyL9F5VrjkE", "uiS58TNyUiw"], "start_seconds": ["20", "430"], "properties": ["wind, blows, vehicle", "vocalize, bird, chirp"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "of the pigeon in the cage"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a toilet door squeaks as it is opened", "a horn blows as a train chugs along and warning bells ring"], "sample_ids": ["sdXV-ylviw", "ukg5L09Wpvo"], "start_seconds": ["190", "150"], "properties": ["door, toilet, squeaks", "a train, a horn, a bell"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a dog barks and taps with background noise ", "a train blows its whistle and blows its horn "], "question": "which entity is a train", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "wind blowing followed by a zoom"], "sample_ids": ["zcDwZ6W7E3E", "vr8ZXjEBhMQ"], "start_seconds": ["180", "150"], "properties": ["man, speak, motorcycles", "wind, blow, zoom"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more likely to be a video of a man speaking?", "label": 0}, {"captions": ["a woman speaks and then a man speaks", "dishes cling together then a man begins to speak"], "sample_ids": ["vbpKkWvfOu4", "sQGXqGcwOTc"], "start_seconds": ["560", "3"], "properties": ["a, man, speaks", "cling, speak, dishes"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "mechanisms are operating and water is splashing "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["distant humming of an engine", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["yVPZ2MNWpms", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["sound, distance, engine", "a, scream, girl"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a car is driving by on the road ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["x6ijhqRY38s", "wqZ135Ssz0"], "start_seconds": ["250", "60"], "properties": ["bowl, silverware, man", "two men, woman, birds"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", null], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vhJWZheqaE", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["water drains unevenly, toilet flushes, water drains", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a person talking?", "label": 1}, {"captions": ["vehicles pass by on a roadway", "a toilet flushes and a female speaks"], "sample_ids": ["tgbONvsP47Y", "yaln9y8I7ms"], "start_seconds": ["0", "230"], "properties": ["pass, vehicle, roadway", "female, flushes, toilet"], "captions_pred_video": ["footage of a fire truck entering a garage", "footage is blurry and out of focus"], "captions_pred_audio": ["a car is driving on the road ", "a toilet flushes and a man speaks"], "question": "which entity is a video", "label": 1}, {"captions": ["a door slams shut roughly", "small dogs yip and bark sharply"], "sample_ids": ["zkKdxzNC97Y", "v-wcQf4BDY0"], "start_seconds": ["27", "120"], "properties": ["a door, slams, shut", "bark, yip, sharply"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a door is opened and closed", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["a drill runs and two people laugh", "a vehicles accelerate quickly and someone laughs"], "sample_ids": ["tEE3MpBt1sg", "uWPRNLnpy7Y"], "start_seconds": ["50", "10"], "properties": ["two people, laugh, drill", "accelerate, laugh, vehicle"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "is taken from a car driving down the street"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["humming and rattling of an engine idling as it revs", "a person snores loudly multiple times at a close distance"], "sample_ids": ["xMXvkIcaG0Y", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["sound, humming, rattling", "loud, multiple, distance"], "captions_pred_video": ["footage of a car's hood being opened up to reveal the engine underneath the hood", null], "captions_pred_audio": ["an engine is revving and accelerating ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "multiple birds vocalize and wind blows"], "sample_ids": ["wvKpEYswXO0", "uoGVs9yUqY4"], "start_seconds": ["150", "30"], "properties": ["plastic, tap, speak", "multiple, vocalize, wind"], "captions_pred_video": ["of the person preparing food in the kitchen", "for how to make a wooden shed door youtube"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "birds are chirping and flapping their wings with wind noise in the background "], "question": "which entity is not a person", "label": 1}, {"captions": ["leaves rustle while man speaks", "a woman speaks as she rubs two objects together"], "sample_ids": ["zOZleIRqZm4", "vzxHnu-SFEw"], "start_seconds": ["80", "80"], "properties": ["leaves, rustle, speak", "two objects, woman, speak"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["a motorcycle engine is revving while people are speaking", "a woman speaks and then a man speaks"], "sample_ids": ["y8dSeubCNI", "vbpKkWvfOu4"], "start_seconds": ["4", "560"], "properties": ["engine revving, people speaking, motorcycle", "a, man, speaks"], "captions_pred_video": [null, "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["an engine revving and people talking in the background", "a woman is speaking and a man is speaking"], "question": "which entity has a man speaking?", "label": 1}, {"captions": ["birds vocalize and chirp continuously", "a muffled toilet flushes and the water drains"], "sample_ids": ["w1mlz3Pe4fU", "sfAvvZwdLCY"], "start_seconds": ["300", "20"], "properties": ["vocalize, chirp, continuously", "flushes, drains, water"], "captions_pred_video": ["of a bird in a cage", "footage of the toilet in the bathroom"], "captions_pred_audio": ["birds are chirping and singing", "a toilet is flushed"], "question": "which entity is silent", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "an audience gives applause"], "sample_ids": ["vhJWZheqaE", "x6iCUDmRpKQ"], "start_seconds": ["0", "38"], "properties": ["water drains unevenly, toilet flushes, water drains", "applause, audience, give"], "captions_pred_video": [null, "a black background with the moon and stars in the sky"], "captions_pred_audio": ["a toilet is flushed", "a group of people are clapping and cheering"], "question": "which entity is a response to a stimulus", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "pigeons vocalize and birds chirp"], "sample_ids": ["vZAw4apG0Es", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["people, clock, converse", "vocalize, bird, chirp"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "of the pigeon in the cage"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking and a bee is buzzing"], "question": "which entity is not a person", "label": 1}, {"captions": ["water flows and trickles", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["tB7hWb9gTuQ", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["water, flow, trickle", "engine, idle, woman"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["water is splashing and gurgling", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["uOpoD0gGXcs", "vlS6YMeWAPo"], "start_seconds": ["120", "40"], "properties": ["chirps, woman, bird", "sheep, baa, birds"], "captions_pred_video": ["a herd of cows grazing in the field", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["birds are chirping and a man is speaking", "a goat bleats and birds chirp"], "question": "which entity is a response to a human chirping?", "label": 0}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "someone snores nearby"], "sample_ids": ["su6FAOcOA8c", "spJCm8tD9Zo"], "start_seconds": ["4", "90"], "properties": ["engine, idle, woman", "someone snores, nearby, someone"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["people speak then an engine runs", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["uMTTDZ2mb4", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["engine, run, people", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["wAAkbZToh8", "sLUnaPT5gM8"], "start_seconds": ["0", "0"], "properties": ["burp, laugh, speak", "loud, laughter, intermittent"], "captions_pred_video": [null, "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man burps and a woman speaks", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more like a snore", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["vlJS7LN2XyM", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["background, clocks, ticking", "motor noise, horn, siren"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a ticktock of a clock", "a truck is honking its horn and a siren is blaring "], "question": "which entity is more ominous", "label": 1}, {"captions": ["leaves rustle while man speaks", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["zOZleIRqZm4", "xKB8O8LTs6s"], "start_seconds": ["80", "70"], "properties": ["leaves, rustle, speak", "music, gunfire, explosion"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more calm", "label": 0}, {"captions": ["a machine runs continuously", "wind blows as people chatter quietly"], "sample_ids": ["wdXV3Pv0jiY", "xBxDz0CFVn0"], "start_seconds": ["11", "30"], "properties": ["machine, running, continuously", "wind, chatter, people"], "captions_pred_video": ["footage is blurry and shaky", "footage is blurry and out of focus"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["sYITalLZjj4", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["water, rushes, background, birds", "People, motor, brakes"], "captions_pred_video": ["two ducks are swimming in the water near each other", null], "captions_pred_audio": ["wind blows and birds chirp", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is more quiet", "label": 0}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["xl2PIWyXaM", "w5W5Kqtc8E"], "start_seconds": ["160", "100"], "properties": ["chirp, man, younger person", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and people are talking", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running?", "label": 1}, {"captions": ["a person is whistling", "birds chirp quietly and an adult man speaks"], "sample_ids": ["sIXTftIuUgw", "zuua6-5goWw"], "start_seconds": ["90", "30"], "properties": ["person, whistling, person", "birds, chirp, quiet, man, speaks"], "captions_pred_video": [null, "in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot"], "captions_pred_audio": ["a person whistling a song", "birds are chirping and a man is speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "a woman speaks happily and an animal chirps"], "sample_ids": ["siJFXfGWgDk", "uWAAAL4CIoc"], "start_seconds": ["50", "0"], "properties": ["man, woman, vehicle", "a woman, chirps, animal"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", null], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a woman is speaking and a dog is barking "], "question": "which entity has a more active animal", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["zdYdyF9-m8U", "wz7N8YRy74I"], "start_seconds": ["7", "30"], "properties": ["wind, crash, shoreline", "rooster, crow, background, men"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["waves crash and wind blows ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a man speaks as a machine runs", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vD6lYD1l0BY", "tDVADusiIoc"], "start_seconds": ["330", "60"], "properties": ["a, machine, run", "water, radio, man"], "captions_pred_video": ["game controller being held in the hands of the person", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio?", "label": 1}, {"captions": ["people speak then an engine runs", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["uMTTDZ2mb4", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["engine, run, people", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a jet engine roars "], "question": "which entity is followed by a man laughing", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "dishes cling together then a man begins to speak"], "sample_ids": ["wqZ135Ssz0", "sQGXqGcwOTc"], "start_seconds": ["60", "3"], "properties": ["man, woman, squawks", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "mechanisms are operating and water is splashing "], "question": "which entity has a man speaking", "label": 1}, {"captions": ["an insect buzzes around continuously", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["v25l1jef3JY", "wz7N8YRy74I"], "start_seconds": ["0", "30"], "properties": ["buzzes, continuously, insect", "rooster, crow, background, men"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a bird?", "label": 1}, {"captions": ["small dogs yip and bark sharply", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["v-wcQf4BDY0", "xKB8O8LTs6s"], "start_seconds": ["120", "70"], "properties": ["bark, yip, sharply", "music, gunfire, explosion"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a dog barks and growls", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "several insects fly while two men talk"], "sample_ids": ["rwtmaKiCcQU", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["nozzle, depressed, spray can", "several, fly, men"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["spraying and people speaking", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more insects", "label": 1}, {"captions": ["a man speaks uses a drill", "propeller rearing loudly with some male and female voices interspersed in the background"], "sample_ids": ["x5eIC7S0fbg", "vqZuVbG6-HI"], "start_seconds": ["60", "130"], "properties": ["A man is speaking, uses a drill, and is a tool", "background, male, female"], "captions_pred_video": ["a person in surgical gloves is using a needle to remove a small object from a tooth", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking and using a power tool ", "a lawn mower is running and men are speaking "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["tDlfY3nmx1A", "uZesmtKZGSw"], "start_seconds": ["160", "250"], "properties": ["applause, laugh, man", "men, talk, cars"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "a man speaks as horns blow"], "sample_ids": ["yswmmRZFItk", "tHyNqRyK34A"], "start_seconds": ["0", "24"], "properties": ["background, frog, croak", "a, man, speaks"], "captions_pred_video": ["a close up of a frog in the water", "being taken from inside a vehicle on the street at night"], "captions_pred_audio": ["a frog is croaking", "a man is speaking and a car is honking with background noise "], "question": "which entity is a human speaking?", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "a baby coos and fidgets as a lady speaks and laughs"], "sample_ids": ["voJh2gJxXhA", "uPDn2BFTHk"], "start_seconds": ["50", "140"], "properties": ["music, frog, croak", "lady, laugh, baby"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", null], "captions_pred_audio": ["music is playing and crickets are chirping ", "a baby laughs and a woman speaks"], "question": "which entity is a human", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "an adult woman and an adult man speak"], "sample_ids": ["sofxkNWaP0s", "zTLVJCo4WEE"], "start_seconds": ["30", "30"], "properties": ["wind, engine, louder", "two people, adult, speak"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "- a boy with a rifle aiming at a target"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a woman speaks and crickets chirp"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a clock ticktocks briefly", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["u7C-AEBQM", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, ticktocks briefly", "male, duck, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and ducks are quacking"], "question": "which entity is a clock?", "label": 0}, {"captions": ["a door slams shut and an object moves on a hard surface", "people applaud and hoot and chat quietly"], "sample_ids": ["zkKdxzNC97Y", "wwyfGO2J4"], "start_seconds": ["27", "90"], "properties": ["hard, surface, door", "people, applaud, hoot"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be in a theater", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sWZzXuWYY", "uYT5gxnyMWM"], "start_seconds": ["420", "50"], "properties": ["male, speech, banging", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a woman", "label": 1}, {"captions": ["pigeons vocalize and birds chirp", "a stream of water flows as people talk and wind blows"], "sample_ids": ["uiS58TNyUiw", "xBxDz0CFVn0"], "start_seconds": ["430", "30"], "properties": ["vocalize, bird, chirp", "stream, water, flow"], "captions_pred_video": ["of the pigeon in the cage", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a man is speaking with wind noise in the background "], "question": "which entity is not a stream of water flowing?", "label": 0}, {"captions": ["a saw finishes running as metal clings in the background", "water runs into a sink while men speak"], "sample_ids": ["zofjfKhqLk8", "vzceMbklWc"], "start_seconds": ["10", "180"], "properties": ["background, metal, clings", "water, sink, run"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "water is running and a man is speaking"], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["a car speeding up in the distance", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["u0TrcHhkPQ", "yajyRTUQk3U"], "start_seconds": ["20", "400"], "properties": ["distance, car, speed", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["an airplane engine runs", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["yVPZ2MNWpms", "yDoT73BWsdA"], "start_seconds": ["0", "10"], "properties": ["engine, airplane, runs", "engine, revs, vehicle"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a car is driving by on the road ", "a race car accelerates and revs its engine "], "question": "which entity has a running engine", "label": 0}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a car speeding up in the distance"], "sample_ids": ["yLy-WycbVVE", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["background, people, talk", "distance, car, speed"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", null], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "a duck quacks continuously"], "sample_ids": ["siJFXfGWgDk", "vh30P49Po6s"], "start_seconds": ["50", "30"], "properties": ["man, woman, vehicle", "quacks, continuously, duck"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "waves crash against a shoreline and people speak"], "sample_ids": ["ylpYOorfH4o", "yFB25fqfU8I"], "start_seconds": ["410", "300"], "properties": ["motor, run, steady", "wave, crash, shoreline"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which is a natural phenomenon", "label": 1}, {"captions": ["a goat bleats as a person speaks", "a young woman speaks over spraying and another person yells"], "sample_ids": ["tPJvjq9QePY", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["bleats, person, speak", "person, spray, yell"], "captions_pred_video": ["a dog and a sheep in a barn", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a baby cries and a man speaks", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["the revving of an engine throttle followed by a man speaking", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["tezvROoo4bs", "yDoT73BWsdA"], "start_seconds": ["40", "10"], "properties": ["audio, throttle, speaking", "engine, revs, vehicle"], "captions_pred_video": ["footage of a busy city street with cars parked on both sides of the road", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a car accelerates and revs while a man speaks ", "a race car accelerates and revs its engine "], "question": "which entity is a video of a vehicle?", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "an insect buzzes around continuously"], "sample_ids": ["w6RTHR6AeAg", "v25l1jef3JY"], "start_seconds": ["40", "0"], "properties": ["call, owl, screech", "buzzes, continuously, insect"], "captions_pred_video": [null, "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a fly is buzzing around a microphone "], "question": "which entity is a predator", "label": 0}, {"captions": ["an aircraft engine runs as wind blows heavily", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["xjvTpk2Zpr8", "vfYTJq7nU"], "start_seconds": ["70", "130"], "properties": ["engine, run, wind", "rustling, ducks, quack"], "captions_pred_video": ["footage of a dhl plane landing on the runway", null], "captions_pred_audio": ["a jet engine roars and wind blows ", "a duck quacks and a woman speaks"], "question": "which entity is not a video of an aircraft engine running?", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wz7N8YRy74I", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["rooster, crow, background, men", "applause, audience, yells"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a helicopter engine runs continuously"], "sample_ids": ["tOSWIURC-4", "ugHJF0hfYkg"], "start_seconds": ["0", "10"], "properties": ["engine, work, nearby", "engine, running, continuously"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a lawn mower is running ", "a helicopter is flying overhead "], "question": "which entity has a running engine", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "a man speaks as a motor runs in the background"], "sample_ids": ["vJvryTwuAV8", "xZepNM9qcRA"], "start_seconds": ["16", "30"], "properties": ["audience, cheer, man", "background, motor, run"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["zFjIWfSD-4", "vfYTJq7nU"], "start_seconds": ["410", "130"], "properties": ["People, motor, brakes", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["water pouring and bubbling", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["uyRfq-jKPpo", "yajyRTUQk3U"], "start_seconds": ["50", "400"], "properties": ["water, bubbles, pouring", "a woman, something, fried"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "- a woman cooking in the kitchen"], "captions_pred_audio": ["water is running from a faucet", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of something being fried?", "label": 1}, {"captions": ["a door opens and closes", "water pouring and bubbling"], "sample_ids": ["vBHyYJ8pL0", "uyRfq-jKPpo"], "start_seconds": ["2", "50"], "properties": ["open, close, door", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "water is running from a faucet"], "question": "which entity is more likely to be a door", "label": 0}, {"captions": ["people speak and laugh as a child speaks", "some men converse over an engine running"], "sample_ids": ["sa6TLVbooCc", "sCiy7QS1U"], "start_seconds": ["240", "300"], "properties": ["people, laugh, child", "men, converse, engine"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", null], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows people speaking and laughing as a child speaks?", "label": 0}, {"captions": ["birds chirp and pigeons vocalize while walking around", "vehicles pass by on a roadway"], "sample_ids": ["wIvYjuR3nrg", "tgbONvsP47Y"], "start_seconds": ["9", "0"], "properties": ["birds, pigeons, vocalize", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a pigeon sitting on a roof with trees in the background", "footage of a fire truck entering a garage"], "captions_pred_audio": ["birds are chirping and cooing", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a clock ticktocks in wind", "a car accelerates and wind blows"], "sample_ids": ["yVumC9TGknc", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["ticktocks, clock, wind", "accelerates, wind, blows"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", null], "captions_pred_audio": ["a series of beeps and chirps", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a toilet flushes and water drains", "a clock ticktocks"], "sample_ids": ["sfAvvZwdLCY", "v-g-j2uTByM"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of the toilet in the bathroom", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a toilet is flushed", "a clock is ticking loudly"], "question": "which entity is a timepiece", "label": 1}, {"captions": ["food is frying and sizzles", "paper folding and crinkling"], "sample_ids": ["zNRChLjqcU", "zPpG3RD8lSs"], "start_seconds": ["220", "20"], "properties": ["food is frying, sizzles, food", "paper, fold, crinkle"], "captions_pred_video": [null, "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["water is running from a faucet into a sink", "the wind blows and a mouse clicks "], "question": "which entity is not a food", "label": 1}, {"captions": ["wind blowing followed by a zoom", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vr8ZXjEBhMQ", "uEU-Hg5MTN8"], "start_seconds": ["150", "27"], "properties": ["wind, blow, zoom", "a woman, laughs, animal"], "captions_pred_video": ["is taken from a motorcycle's point of view", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["vbpKkWvfOu4", "wqZ135Ssz0"], "start_seconds": ["560", "60"], "properties": ["a, man, speaks", "two men, woman, birds"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "an insect buzzes around continuously"], "sample_ids": ["vveS8HT7Uog", "v25l1jef3JY"], "start_seconds": ["100", "0"], "properties": ["a man, objects, speak", "buzzes, continuously, insect"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["a drill runs and two people laugh", "water drips and bubbles as a man speaks"], "sample_ids": ["tEE3MpBt1sg", "vSeGhaZt-aI"], "start_seconds": ["50", "50"], "properties": ["two people, laugh, drill", "water, bubbles, speak"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man is speaking and pouring liquid with background noise "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "multiple people speak and children yell while water gurgles"], "sample_ids": ["xKB8O8LTs6s", "vb1fPSDI4c"], "start_seconds": ["70", "30"], "properties": ["music, gunfire, explosion", "multiple, people, yell"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", null], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "winds blows roughly as a vehicle races past"], "sample_ids": ["sjlVMgdGSK0", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["car, revving, loudly", "wind, blows, vehicle"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a jet engine roars and wind blows "], "question": "which is not a vehicle", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "water flows as men speak and yell"], "sample_ids": ["uWPRNLnpy7Y", "vJ7JPEFhyLA"], "start_seconds": ["10", "16"], "properties": ["accelerate, laugh, vehicle", "water, flow, men"], "captions_pred_video": ["is taken from a car driving down the street", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a vehicle?", "label": 0}, {"captions": ["an insect buzzes around continuously", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["v25l1jef3JY", "wz7N8YRy74I"], "start_seconds": ["0", "30"], "properties": ["buzzes, continuously, insect", "rooster, crow, background, men"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a bird?", "label": 1}, {"captions": ["wind blowing followed by a zoom", "a man talks while metallic objects are rapped and steam is released"], "sample_ids": ["vr8ZXjEBhMQ", "vuUVPzd2FXw"], "start_seconds": ["150", "160"], "properties": ["wind, blow, zoom", "a, steam, release"], "captions_pred_video": ["is taken from a motorcycle's point of view", "of the person cooking on the grill with a spatula"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a man is speaking and dishes are clanging"], "question": "which entity is a video of a man talking?", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "a man speaks, another man speaks, and a small bell dings"], "sample_ids": ["xjhAnI2q6hM", "t69a8aRKhmc"], "start_seconds": ["6", "30"], "properties": ["engine revs, vehicle, people", "a, b, c"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "footage is blurry and out of focus"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a man is speaking and birds are chirping in the background "], "question": "which entity has a vehicle?", "label": 0}, {"captions": ["a person speaks over rustling leaves", "water pouring and bubbling"], "sample_ids": ["zOZleIRqZm4", "uyRfq-jKPpo"], "start_seconds": ["80", "50"], "properties": ["rustling, leaves, person", "water, bubbles, pouring"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "a infant makes noise and is excited"], "sample_ids": ["xfaoyyzw2WU", "wIJK3-5y0kA"], "start_seconds": ["180", "30"], "properties": ["loud, jet engine, roar", "noise, excited, infant"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a baby cries and a woman speaks"], "question": "which is louder", "label": 1}, {"captions": ["a man is filing a hard object", "a man speaks over intermittent keyboard taps"], "sample_ids": ["vveS8HT7Uog", "tw76HGONaKg"], "start_seconds": ["100", "570"], "properties": ["a man, hard, object", "audio, man, keyboard"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a man speaks and types on a computer keyboard "], "question": "which entity is a video of a man speaking over intermittent keyboard taps?", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a man speaks followed by another man speaking outside"], "sample_ids": ["wz7N8YRy74I", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["rooster, crow, background, people", "two men, speak, follow"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a rooster in it?", "label": 0}, {"captions": ["a jet engine screams, then increases its power", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vBslzh7saPw", "uYT5gxnyMWM"], "start_seconds": ["90", "50"], "properties": ["power, scream, increase", "female, spraying, scream"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "water pouring and bubbling"], "sample_ids": ["vh30P49Po6s", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["loud, continuous, quacks", "water, bubbles, pouring"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a duck is quacking loudly", "water is running from a faucet"], "question": "which entity is more quiet", "label": 1}, {"captions": ["multiple ducks quack continuously", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wfHeoPDLMaM", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["multiple, quack, continuously", "loud, multiple, distance"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", null], "captions_pred_audio": ["ducks are quacking", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a woman speaks and other women and a man talk with her", "an infant crying frantically"], "sample_ids": ["vbpKkWvfOu4", "zwOBqeFTgiU"], "start_seconds": ["560", "30"], "properties": ["a, woman, man", "cry, infant, frantically"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "of the baby crying in the car seat"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["water rushes by", "a man is snoring loudly and repeatedly"], "sample_ids": ["x-PeY8Yb8M4", "sncRqQ67iJU"], "start_seconds": ["300", "460"], "properties": ["water, rushes, by", "loud, repeatedly, man"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "of an airplane flying in the dark sky at night"], "captions_pred_audio": ["a car is driving on a wet road ", "a person is snoring"], "question": "which entity is louder", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["su6FAOcOA8c", "sLUnaPT5gM8"], "start_seconds": ["4", "0"], "properties": ["engine, idle, woman", "loud, laughter, intermittent"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a horn rings out as a machine runs by", "a car accelerates and wind blows"], "sample_ids": ["slZLHwNbbt4", "u0TrcHhkPQ"], "start_seconds": ["300", "20"], "properties": ["a, horn, run", "accelerates, wind, blows"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", null], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a race car accelerates and revs its engine "], "question": "which is not a machine", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["y8WEcpOlT3I", "uZesmtKZGSw"], "start_seconds": ["40", "250"], "properties": ["harsh, wind, blows", "men, talk, cars"], "captions_pred_video": ["on how to use a sewing machine youtube", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a helicopter engine runs continuously", "vehicles pass by on a roadway"], "sample_ids": ["ugHJF0hfYkg", "tgbONvsP47Y"], "start_seconds": ["10", "0"], "properties": ["engine, running, continuously", "pass, vehicle, roadway"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a helicopter is flying overhead ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["an insect buzzes around continuously", "a horn blasts as warning bells ring"], "sample_ids": ["v25l1jef3JY", "zgUgkpk78xU"], "start_seconds": ["0", "70"], "properties": ["buzzes, continuously, insect", "horn, bells, ring"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a train blows its horn as it speeds down the tracks "], "question": "which entity is louder", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["xV7Mg1QucSc", "yDoT73BWsdA"], "start_seconds": ["14", "10"], "properties": ["alarm, ticktocks, laughs", "engine, revs, vehicle"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a door opens and birds chirp", "birds vocalize and chirp continuously"], "sample_ids": ["yeFvk9x0wWI", "w1mlz3Pe4fU"], "start_seconds": ["30", "300"], "properties": ["door, open, birds", "vocalize, chirp, continuously"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "of a bird in a cage"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "birds are chirping and singing"], "question": "which entity is more active", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "a jet engine spools up and takes off"], "sample_ids": ["wRV8yMk886E", "vBslzh7saPw"], "start_seconds": ["0", "90"], "properties": ["liquid, spray, nozzle", "engine, spools, takes"], "captions_pred_video": ["two cars are parked in a parking lot at night", "a pickup truck carrying a large object down the road"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a jet engine roars and accelerates "], "question": "which entity is a moving object", "label": 1}, {"captions": ["continuous snoring", "a vehicle engine accelerating then running on idle"], "sample_ids": ["sLkeqCDJIyw", "vYkA3cfXp5Q"], "start_seconds": ["120", "30"], "properties": ["loud, snoring, noise", "engine, accelerate, idle"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a person is snoring loudly", "an engine is idling"], "question": "which entity is not a noise", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a infant makes noise and is excited"], "sample_ids": ["v0x1odnXtP0", "wIJK3-5y0kA"], "start_seconds": ["210", "30"], "properties": ["keyboard, type, computer", "noise, excited, infant"], "captions_pred_video": ["how to make money on youtube in spanish", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a person is typing on a keyboard", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "a car accelerates and wind blows"], "sample_ids": ["vJ7JPEFhyLA", "u0TrcHhkPQ"], "start_seconds": ["16", "20"], "properties": ["three men, wind, flow", "accelerates, wind, blows"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["someone whistles a song", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sIXTftIuUgw", "wqZ135Ssz0"], "start_seconds": ["90", "60"], "properties": ["someone, song, whistle", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person whistling a song", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["yZmhM1HcsyE", "tdWhHV3X25Q"], "start_seconds": ["4", "60"], "properties": ["engine, roar, water", "applause, audience, yells"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["vehicles pass by on a roadway", "a clock ticktocks"], "sample_ids": ["tgbONvsP47Y", "v-g-j2uTByM"], "start_seconds": ["0", "30"], "properties": ["pass, vehicle, roadway", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a fire truck entering a garage", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a car is driving on the road ", "a clock is ticking loudly"], "question": "which entity is a clock?", "label": 1}, {"captions": ["wind blowing followed by a zoom", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vr8ZXjEBhMQ", "sSMl2vc3ek"], "start_seconds": ["150", "20"], "properties": ["wind, blow, zoom", "loud, multiple, distance"], "captions_pred_video": ["is taken from a motorcycle's point of view", null], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a person snoring loudly"], "question": "which entity is not a zoom", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "someone whistles a tune"], "sample_ids": ["w0xsN8X18Y", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["music, surface, rain", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a man speaking with light rustling", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["zOZleIRqZm4", "uYT5gxnyMWM"], "start_seconds": ["80", "50"], "properties": ["light, rustling, man", "a, scream, girl"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking and a baby is crying"], "question": "which entity is more spooky", "label": 1}, {"captions": ["a infant makes noise and is excited", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["wIJK3-5y0kA", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["noise, excited, infant", "water, radio, man"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "dogs bark as an engine runs and a person whistles"], "sample_ids": ["sWZzXuWYY", "zY3icUyMdh8"], "start_seconds": ["420", "20"], "properties": ["male, speech, banging", "dog, bark, engine"], "captions_pred_video": [null, "footage of a bus driving through a residential street at night"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a car is driving and dogs are barking and squealing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["xyL9F5VrjkE", "ukg5L09Wpvo"], "start_seconds": ["20", "150"], "properties": ["wind, motor, distance", "clickety-clack, train, whistle"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a person is whistling a tune", "some tunes played by whistling"], "sample_ids": ["scYRUkrFLiQ", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["a, tune, whistle", "tune, play, whistling"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a person whistling a song", "a person whistling a song"], "question": "which entity is playing a tune", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "three men talk while wind blows and some liquid flows"], "sample_ids": ["ziUT9IFTkjg", "vJ7JPEFhyLA"], "start_seconds": ["10", "16"], "properties": ["background, birds, rustling", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a natural phenomenon", "label": 0}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "an infant crying as a woman laughs"], "sample_ids": ["yYEVLuqEytU", "xhmRY9yhC7c"], "start_seconds": ["40", "20"], "properties": ["animal, pig, background", "a, laugh, infant"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["spYNpeN7rPY", "yDoT73BWsdA"], "start_seconds": ["1", "10"], "properties": ["a clock, ticktock, man", "engine, revs, vehicle"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["an audience gives applause", "a man speaks uses a drill"], "sample_ids": ["x6iCUDmRpKQ", "x5eIC7S0fbg"], "start_seconds": ["38", "60"], "properties": ["applause, audience, give", "A man is speaking, uses a drill, and is a tool"], "captions_pred_video": ["a black background with the moon and stars in the sky", "a person in surgical gloves is using a needle to remove a small object from a tooth"], "captions_pred_audio": ["a group of people are clapping and cheering", "a man is speaking and using a power tool "], "question": "which entity is a tool", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "here comes the train and it starts to blow the horn and get close"], "sample_ids": ["vzceMbklWc", "s7knHCFW82w"], "start_seconds": ["180", "30"], "properties": ["water, faucet, sink", "blow horn, get close, train"], "captions_pred_video": [null, "footage of the train on the tracks near a building and a car parked on the side of the road"], "captions_pred_audio": ["water is running and a man is speaking", "a train is blowing its horn and its wheels are squealing "], "question": "which is a train", "label": 1}, {"captions": ["an airplane engine runs", "a person uses a saw to cut some wood"], "sample_ids": ["yVPZ2MNWpms", "sHbXC6na9hg"], "start_seconds": ["0", "0"], "properties": ["engine, airplane, runs", "a person, saw, wood"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "a man using a tractor to cut a log into firewood youtube"], "captions_pred_audio": ["a car is driving by on the road ", "an engine is idling and vibrating"], "question": "which entity is a person", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "someone is typing on a computer keyboard"], "sample_ids": ["vXlk0lIQBFo", "v0x1odnXtP0"], "start_seconds": ["470", "210"], "properties": ["wind, talk, vocalize", "keyboard, type, computer"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "how to make money on youtube in spanish"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a clock ticktocks", "a infant makes noise and is excited"], "sample_ids": ["v-g-j2uTByM", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, ticktocks", "noise, excited, infant"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a clock is ticking loudly", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 0}, {"captions": ["women speak and laugh as wind blows", "an airplane engine spools and people speak"], "sample_ids": ["un9VQlzgZM", "wTjoRj1se3U"], "start_seconds": ["5", "390"], "properties": ["wind, speak, laugh", "airplane, engine, spool"], "captions_pred_video": [null, "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a jet engine is running and people are talking"], "question": "which entity is about a moving object", "label": 1}, {"captions": ["material crumbles into a microphone", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vofpvUo6NAw", "tdWhHV3X25Q"], "start_seconds": ["220", "60"], "properties": ["material, crumbles, microphone", "applause, audience, yells"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a man is speaking and a crowd is clapping"], "question": "which is a live action", "label": 1}, {"captions": ["a train horn sounds as a railroad passing bell rings", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["zgUgkpk78xU", "sLUnaPT5gM8"], "start_seconds": ["70", "0"], "properties": ["horn, bell, train", "loud, laughter, intermittent"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a dog barks and whimpers", "a train horn blows as it passes by"], "sample_ids": ["sShpyu2l4YQ", "zVacuqSb4LI"], "start_seconds": ["0", "30"], "properties": ["barks, whimpers, dog", "horn, blows, train"], "captions_pred_video": ["the puppies are playing with a toy", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a dog is barking and growling", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["scraping and female speech with distant music", "a man speaks as a motor runs in the background"], "sample_ids": ["yHeVV-xeOxQ", "xZepNM9qcRA"], "start_seconds": ["130", "30"], "properties": ["female, speech, music", "background, motor, run"], "captions_pred_video": ["of a girl milking a goat's udder", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["xyL9F5VrjkE", "yajyRTUQk3U"], "start_seconds": ["20", "400"], "properties": ["engine, run, wind", "a woman, something, fried"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "- a woman cooking in the kitchen"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "small dogs yip and bark sharply"], "sample_ids": ["u6jIvCtKarQ", "v-wcQf4BDY0"], "start_seconds": ["70", "120"], "properties": ["a, man, speaks", "bark, yip, sharply"], "captions_pred_video": ["footage of a person using a blender on a stove top", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "an infant crying as a woman laughs"], "sample_ids": ["yNtRmrn0io8", "xhmRY9yhC7c"], "start_seconds": ["210", "20"], "properties": ["storm, distance, strike", "a, laugh, infant"], "captions_pred_video": ["footage of a house in the middle of the night", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["rain falls and thunder roars", "a baby cries and a woman speaks"], "question": "which is not a person", "label": 0}, {"captions": ["a woman speaks and food sizzles while frying", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wTideSjRFS0", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["food, sizzle, woman", "airplane, boy, fly"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a woman speaks while a helicopter flies overhead "], "question": "which entity is about flying", "label": 1}, {"captions": ["a woman talking as an infant is crying", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["tMbMDvT50j8", "xKB8O8LTs6s"], "start_seconds": ["12", "70"], "properties": ["a, talk, infant", "music, gunfire, explosion"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a baby cries and a woman speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more calm", "label": 0}, {"captions": ["water splashes as an animal walks through", "a duck quacks continuously"], "sample_ids": ["w1ir-sZ3Im8", "vh30P49Po6s"], "start_seconds": ["90", "30"], "properties": ["animal, water, splashes", "quacks, continuously, duck"], "captions_pred_video": ["footage of a group of people riding horses through a river", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["water splashes and gurgles as people speak", "a duck is quacking loudly"], "question": "which animal is more active", "label": 0}, {"captions": ["a rumble grows louder", "people speak as gunfire rings out"], "sample_ids": ["y4MY9mp8-TA", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["loudness, increase, rumble", "gunfire, ring, speak"], "captions_pred_video": ["a helicopter flying in the sky", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a helicopter flies overhead ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "plastic is tapped on while someone speaks"], "sample_ids": ["v7jJS8aAyA", "wvKpEYswXO0"], "start_seconds": ["10", "150"], "properties": ["wind, blows, loudly", "plastic, tap, speak"], "captions_pred_video": [null, "of the person preparing food in the kitchen"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is quieter", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "a duck quacks continuously"], "sample_ids": ["zhx6hoYrHeI", "vh30P49Po6s"], "start_seconds": ["160", "30"], "properties": ["engine, sputter, rough", "quacks, continuously, duck"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "water is sprayed across a hard surface"], "sample_ids": ["sOa7g-44Dag", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["background, man, spray", "water, spray, surface"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "spraying followed by silence"], "question": "which entity is sprayed across a hard surface", "label": 1}, {"captions": ["a duck quacks continuously", "wind blows and women speak as livestock vocalizes"], "sample_ids": ["vh30P49Po6s", "vXlk0lIQBFo"], "start_seconds": ["30", "470"], "properties": ["quacks, continuously, duck", "wind, speak, vocalize"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "- a woman and two donkeys in a fenced in area"], "captions_pred_audio": ["a duck is quacking loudly", "wind chimes are ringing and people are speaking and laughing "], "question": "which entity is speaking", "label": 1}, {"captions": ["motors runs briefly and tires screech", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["yRx9txMcBl0", "yajyRTUQk3U"], "start_seconds": ["40", "400"], "properties": ["motors, tires, screech", "a woman, something, fried"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "birds chirp as a man speaks and a younger person speaks"], "sample_ids": ["uC9dtII1KDI", "xl2PIWyXaM"], "start_seconds": ["150", "160"], "properties": ["wind, gusts, distance", "chirp, man, younger person"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", null], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "birds are chirping and people are talking"], "question": "which entity is more likely to be in a city", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["zl9Dqx-j7q4", "tDVADusiIoc"], "start_seconds": ["6", "60"], "properties": ["engine, laugh, loud", "water, radio, man"], "captions_pred_video": ["footage of a man driving a car in the dark", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a jet engine roars ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "a speedboat passes quickly on the water"], "sample_ids": ["w5W5Kqtc8E", "tjmoSi330GM"], "start_seconds": ["100", "23"], "properties": ["water, splashes, motorboat", "speed, water, boat"], "captions_pred_video": [null, "a person riding a jet ski on a lake with trees in the background"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a motorboat speeds through water with wind noise "], "question": "which boat is moving faster", "label": 1}, {"captions": ["a vehicle engine revs and tires squeal", "wind blows as people chatter quietly"], "sample_ids": ["yDoT73BWsdA", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["engine revs, tires squeal, vehicle", "wind, chatter, people"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "footage is blurry and out of focus"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["multiple birds vocalize and wind blows", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["uoGVs9yUqY4", "vlS6YMeWAPo"], "start_seconds": ["30", "40"], "properties": ["multiple, vocalize, wind", "sheep, baa, birds"], "captions_pred_video": ["for how to make a wooden shed door youtube", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a goat bleats and birds chirp"], "question": "which entity is a sheep?", "label": 1}, {"captions": ["a infant makes noise and is excited", "a telephone rings followed by a woman talking"], "sample_ids": ["wIJK3-5y0kA", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["noise, excited, infant", "ring, talk, woman"], "captions_pred_video": ["of a baby playing with a cat in a dark room", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["a infant makes noise and is excited", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["wIJK3-5y0kA", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["noise, excited, infant", "People, motor, brakes"], "captions_pred_video": ["of a baby playing with a cat in a dark room", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a person", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "a machine beeps continuously"], "sample_ids": ["wTideSjRFS0", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["food, sizzle, woman", "beeps, machine, continuously"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "running water in a faucet with some clinks"], "sample_ids": ["vh30P49Po6s", "zNRChLjqcU"], "start_seconds": ["30", "220"], "properties": ["loud, continuous, quacks", "water, faucet, run"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "water is running from a faucet into a sink"], "question": "which entity is quieter", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vJvryTwuAV8", "vb1fPSDI4c"], "start_seconds": ["16", "30"], "properties": ["audience, cheer, man", "multiple, people, yell"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", null], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking?", "label": 1}, {"captions": ["people speak in a closed space", "a car accelerates and wind blows"], "sample_ids": ["sTpirNYo8vQ", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["people, space, speak", "accelerates, wind, blows"], "captions_pred_video": ["of a man taking a selfie on a bus", null], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a person sniffles and sneezes", "several insects fly while two men talk"], "sample_ids": ["uRlbY6aoBU", "s-T9OVOiMLo"], "start_seconds": ["0", "330"], "properties": ["sneezes, sniffles, person", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is sneezing ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a person?", "label": 0}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "a man speaks as a motor runs in the background"], "sample_ids": ["vdoxuJn9lTc", "xZepNM9qcRA"], "start_seconds": ["40", "30"], "properties": ["burp, loud, girl", "background, motor, run"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a child speaks followed by a burp", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a helicopter engine runs continuously", "small dogs yip and bark sharply"], "sample_ids": ["ugHJF0hfYkg", "v-wcQf4BDY0"], "start_seconds": ["10", "120"], "properties": ["engine, running, continuously", "bark, yip, sharply"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a helicopter is flying overhead ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "a duck quacks continuously"], "sample_ids": ["wwyfGO2J4", "vh30P49Po6s"], "start_seconds": ["90", "30"], "properties": ["people, applaud, hoot", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["continuous snoring", "a woman speaks as she rubs two objects together"], "sample_ids": ["sLkeqCDJIyw", "vzxHnu-SFEw"], "start_seconds": ["120", "80"], "properties": ["loud, snoring, noise", "two objects, woman, speak"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is silent", "label": 1}, {"captions": ["someone is burping continuously", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["y636gklDioE", "uZesmtKZGSw"], "start_seconds": ["20", "250"], "properties": ["burps, burps, burps", "men, talk, cars"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a person burps loudly several times", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["food fries in a pan as someone talks and cooks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["ukxt9I7eMMg", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["food, pan, cook", "rooster, crow, background, men"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is about a rooster?", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "water splashes as an animal walks through"], "sample_ids": ["ylpYOorfH4o", "w1ir-sZ3Im8"], "start_seconds": ["410", "90"], "properties": ["engine, run, loud", "animal, water, splashes"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking and an engine is revving", "water splashes and gurgles as people speak"], "question": "which entity is more quiet", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "water splashes as an animal walks through"], "sample_ids": ["w34HjHr6gAY", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["beeps, squawk, child speaking", "animal, water, splashes"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "water splashes and gurgles as people speak"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["someone whistles briefly", "an insect buzzes around continuously"], "sample_ids": ["uFoga8sHpiw", "v25l1jef3JY"], "start_seconds": ["90", "0"], "properties": ["sound, duration, pitch", "buzzes, continuously, insect"], "captions_pred_video": ["footage of a bird in a cage", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a person whistles a song", "a fly is buzzing around a microphone "], "question": "which entity buzzes continuously", "label": 1}, {"captions": ["a infant makes noise and is excited", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wIJK3-5y0kA", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["noise, excited, infant", "three men, wind, flow"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "a stream of water runs briefly"], "sample_ids": ["w6RTHR6AeAg", "x-PeY8Yb8M4"], "start_seconds": ["40", "300"], "properties": ["call, owl, screech", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a car is driving on a wet road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "a horn rings out as a machine runs by"], "sample_ids": ["sZPuqDgX2V0", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["engine, accelerate, intercom", "a, horn, run"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["sOa7g-44Dag", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["audio, scratching, man", "water, radio, man"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a recording of a man speaking?", "label": 0}, {"captions": ["motors runs briefly and tires screech", "water flows as men speak and yell"], "sample_ids": ["yRx9txMcBl0", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["motors, tires, screech", "water, flow, men"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a car accelerates and wind blows"], "sample_ids": ["sfAvvZwdLCY", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["water drains, flushes, water", "accelerates, wind, blows"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["someone snores nearby", "a woman speaks as she rubs two objects together"], "sample_ids": ["spJCm8tD9Zo", "vzxHnu-SFEw"], "start_seconds": ["90", "80"], "properties": ["someone snores, nearby, someone", "two objects, woman, speak"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["a man talks while vehicles pass by", "people speaking indiscriminately in the distance with a person snoring loudly nearby"], "sample_ids": ["sK4u5T8hW78", "w2JXXIAdUdg"], "start_seconds": ["30", "10"], "properties": ["a, man, talk", "snoring, distance, person"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "a close up shot of a person's mouth with a toothbrush in it"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a person snoring and a dog whimpering"], "question": "which entity is more likely to be a person", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "repeated tapping is accompanied by water running and a woman speaking softly"], "sample_ids": ["vfYTJq7nU", "wvKpEYswXO0"], "start_seconds": ["130", "150"], "properties": ["rustling, ducks, quack", "sound, water, running"], "captions_pred_video": [null, "of the person preparing food in the kitchen"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is accompanied by water running", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tOSWIURC-4", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["engine, work, nearby", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a lawn mower is running ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a bird is chirping and tweeting a bird song", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["wPz6QRAkEb4", "xfaoyyzw2WU"], "start_seconds": ["60", "180"], "properties": ["chirps, tweets, song", "loud, jet engine, roar"], "captions_pred_video": ["a bird in a cage on top of a pole", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["birds are chirping in the background ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "some tunes played by whistling"], "sample_ids": ["sOa7g-44Dag", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["audio, scratching, man", "tune, play, whistling"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a person whistling a song"], "question": "which entity is playing tunes", "label": 1}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "a clock ticktocks"], "sample_ids": ["xNMovAf3o50", "v-g-j2uTByM"], "start_seconds": ["0", "30"], "properties": ["rain, thunder, music", "ticktocks, clock, ticktocks"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["thunder and rain with music playing in the background ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["wind blows strongly", "multiple people speak and children yell while water gurgles"], "sample_ids": ["w8uLijTqtlU", "vb1fPSDI4c"], "start_seconds": ["70", "30"], "properties": ["wind, blows, strongly", "multiple, people, yell"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["the wind is blowing strongly", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "paper is crumpling consistently"], "sample_ids": ["v7jJS8aAyA", "v5cSxLaHADY"], "start_seconds": ["10", "0"], "properties": ["wind, blows, loudly", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["uZesmtKZGSw", "tdWhHV3X25Q"], "start_seconds": ["250", "60"], "properties": ["car, track, man", "applause, audience, yells"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a diesel truck engine runs continuously", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["sZvwOuuPGP0", "wz7N8YRy74I"], "start_seconds": ["50", "30"], "properties": ["engine, diesel, truck", "rooster, crow, background, men"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a medium engine is running ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man speaking to a rooster?", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "several insects fly while two men talk"], "sample_ids": ["xl2PIWyXaM", "s-T9OVOiMLo"], "start_seconds": ["160", "330"], "properties": ["chirp, man, younger person", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["birds are chirping and people are talking", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a man speaking and a younger person speaking?", "label": 0}, {"captions": ["a train engine runs and a horn blows", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["zPX9o1uDiI", "y2bVZ7rz-5M"], "start_seconds": ["40", "280"], "properties": ["engine, horn, run", "motor noise, horn, siren"], "captions_pred_video": [null, "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["children speak as a female ask them questions", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wEBlkGWVWwE", "zj2R0XoFr5k"], "start_seconds": ["260", "50"], "properties": ["female, speak, questions", "airplane, boy, fly"], "captions_pred_video": ["shows a person writing on the whiteboard", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is about a boy speaking?", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "crowd applause while a guy laughs followed by another man speaking"], "sample_ids": ["vbZ-0lGPneg", "tDlfY3nmx1A"], "start_seconds": ["30", "160"], "properties": ["a woman, a television program, a bird", "applause, laugh, man"], "captions_pred_video": ["of a man holding a baby duck in his hands", "a man in a suit and tie is talking to another man in a suit and tie"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a crowd is clapping and laughing and a man is speaking "], "question": "which entity has more people", "label": 1}, {"captions": ["a clock ticktocks briefly", "a heavy rain falls endlessly"], "sample_ids": ["u7C-AEBQM", "wP8ZKrlx3oA"], "start_seconds": ["30", "40"], "properties": ["ticktocks, clock, ticktocks briefly", "heavy, rain, fall"], "captions_pred_video": [null, "footage of a flooded street in the middle of a desert with mountains in the background"], "captions_pred_audio": ["a ticktock of a clock", "a heavy rain is falling on a surface"], "question": "which entity is falling", "label": 1}, {"captions": ["a clock ticktocks in wind", "a group of people chatter and talk as multiple horns honk in the background"], "sample_ids": ["yVumC9TGknc", "yLy-WycbVVE"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, wind", "background, people, talk"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "a soccer field in a stadium with yellow and red seats"], "captions_pred_audio": ["a series of beeps and chirps", "a man is speaking and a church bell is ringing with wind noise in the background "], "question": "which entity is talking", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a clock ticktocks"], "sample_ids": ["xSKJGCItUWE", "v-g-j2uTByM"], "start_seconds": ["10", "30"], "properties": ["engine, run, boy", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of the helicopter flying in the room", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vXlk0lIQBFo", "uEU-Hg5MTN8"], "start_seconds": ["470", "27"], "properties": ["wind, talk, vocalize", "a woman, laughs, animal"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vbr9mHKc8WM", "ukg5L09Wpvo"], "start_seconds": ["40", "150"], "properties": ["noise, loudness, engine", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["an engine is idling", "a train blows its whistle and blows its horn "], "question": "which train is making noise", "label": 1}, {"captions": ["a toilet flushes and water sputters as it drains", "a man speaks as insects buzz and a bird chirps"], "sample_ids": ["smGI3C1NZc", "t25U-v4k4ts"], "start_seconds": ["30", "40"], "properties": ["water, drain, toilet", "a, chirps, bird"], "captions_pred_video": [null, "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking and bees are buzzing"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a weapon fires multiple times", "a woman speaks happily and an animal chirps"], "sample_ids": ["sMC07Ucy7kg", "uWAAAL4CIoc"], "start_seconds": ["10", "0"], "properties": ["weapon, fire, multiple", "a woman, chirps, animal"], "captions_pred_video": ["footage is from a car's point of view", null], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a woman is speaking and a dog is barking "], "question": "which entity is more passive", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "a man speaks as a car is passing by"], "sample_ids": ["vddP56-ogds", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["water, flow, laugh", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a machine runs continuously", "an insect buzzes around continuously"], "sample_ids": ["wdXV3Pv0jiY", "v25l1jef3JY"], "start_seconds": ["11", "0"], "properties": ["machine, running, continuously", "buzzes, continuously, insect"], "captions_pred_video": ["footage is blurry and shaky", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "a woman speaks followed by another woman whimpering and speaking"], "sample_ids": ["sOa7g-44Dag", "xOZfdgAgJ9o"], "start_seconds": ["30", "40"], "properties": ["background, man, spray", "woman, whimpering, speaking"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "footage of a woman talking to a man in a doctor's office"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a woman is speaking and a baby is crying"], "question": "which entity is a woman speaking?", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wRBHTgrbiwg", "tdWhHV3X25Q"], "start_seconds": ["50", "60"], "properties": ["bird, owl, speak", "applause, audience, yells"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "a man speaks as a car is passing by"], "sample_ids": ["v5P-ThUCINM", "sK4u5T8hW78"], "start_seconds": ["400", "30"], "properties": ["background, chirp, bird", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["wz7N8YRy74I", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["rooster, crow, background, men", "men, talk, cars"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a cat meows and children speak", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["x5cuQjOdM3E", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["cat, speak, children", "female, spraying, scream"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a cat meows and a woman speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "small dogs yip and bark sharply"], "sample_ids": ["uKCSGgof8gI", "v-wcQf4BDY0"], "start_seconds": ["12", "120"], "properties": ["chirps, distance, signal", "bark, yip, sharply"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["multiple motorcycles pass by as a man speaks", "a flush is followed by gurgling water, then another flush"], "sample_ids": ["zcDwZ6W7E3E", "tqR406bGiE"], "start_seconds": ["180", "40"], "properties": ["man, speak, motorcycles", "flush, water, gurgle"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", null], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a toilet is flushed"], "question": "which entity is about water?", "label": 1}, {"captions": ["a dark barks and whimpers", "winds blows roughly as a vehicle races past"], "sample_ids": ["sYj4hpDUZDQ", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["barks, whimpers, dark", "wind, blows, vehicle"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a dog barks and a cat meows", "a jet engine roars and wind blows "], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "a person snores loudly multiple times at a close distance"], "sample_ids": ["soTOh3zYJfY", "sSMl2vc3ek"], "start_seconds": ["40", "20"], "properties": ["vehicle, skid, tires", "loud, multiple, distance"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["people speak softly as food sizzles", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["yhQ2Lg-7qDY", "w5W5Kqtc8E"], "start_seconds": ["130", "100"], "properties": ["food, sizzle, speak", "wind, blow, vehicle"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", null], "captions_pred_audio": ["a faucet is running and a man is speaking", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running?", "label": 1}, {"captions": ["a man speaks as a machine runs", "a man speaks followed by another man speaking outside"], "sample_ids": ["vD6lYD1l0BY", "viuTg1M-dqg"], "start_seconds": ["330", "30"], "properties": ["a, machine, run", "two men, speak, follow"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a person speaks over rustling leaves", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["zOZleIRqZm4", "sLUnaPT5gM8"], "start_seconds": ["80", "0"], "properties": ["rustling, leaves, person", "loud, laughter, intermittent"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a large bell chimes back and forth loudly", "a stream of water flows as people talk and wind blows"], "sample_ids": ["w2M4i1mklOA", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["loud, chime, bell", "stream, water, flow"], "captions_pred_video": ["footage of an antique clock", "footage is blurry and out of focus"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a mechanical buzzing getting louder", "roadway noise occurs and a truck accelerates"], "sample_ids": ["sEprKHm8Sj8", "tgbONvsP47Y"], "start_seconds": ["90", "0"], "properties": ["noise, loud, buzzing", "noise, truck, accelerate"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a car is driving on the road "], "question": "which noise is caused by a truck", "label": 1}, {"captions": ["a goat screams and people speak in the background", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["xC8kbrKJmco", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["background, goat, scream", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a goat is bleating ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["wz7N8YRy74I", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["rooster, crow, background, men", "beeps, hit, woman"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "an airplane engine runs"], "sample_ids": ["yRx9txMcBl0", "yVPZ2MNWpms"], "start_seconds": ["40", "0"], "properties": ["accelerates, tires, squeals", "engine, airplane, runs"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a car is driving by on the road "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["paper is crumpling consistently", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["v5cSxLaHADY", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "airplane, boy, fly"], "captions_pred_video": ["footage of the person holding a pair of scissors", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["paper is crumpled and crinkled", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "a woman speaks and taps on a hard surface before running tap water"], "sample_ids": ["xjvTpk2Zpr8", "wvKpEYswXO0"], "start_seconds": ["70", "150"], "properties": ["engine, run, wind", "water, tap, run"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is not running", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "a stream of water runs briefly"], "sample_ids": ["sHbXC6na9hg", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["a person, saw, wood", "stream, water, run"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["an engine is idling and vibrating", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["white noise and birds chirping", "loud, continuous burping"], "sample_ids": ["wRBHTgrbiwg", "y636gklDioE"], "start_seconds": ["50", "20"], "properties": ["noise, white, chirping", "loud, continuous, burping"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "a dog sitting on a red chair in front of an old telephone"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a person burps loudly several times"], "question": "which noise is louder", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "winds blows roughly as a vehicle races past"], "sample_ids": ["wRBHTgrbiwg", "xjvTpk2Zpr8"], "start_seconds": ["50", "70"], "properties": ["bird, owl, speak", "wind, blows, vehicle"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a jet engine roars and wind blows "], "question": "which entity is more active", "label": 1}, {"captions": ["people speak softly as food sizzles", "a woman speaks as she rubs two objects together"], "sample_ids": ["yhQ2Lg-7qDY", "vzxHnu-SFEw"], "start_seconds": ["130", "80"], "properties": ["food, sizzle, speak", "two objects, woman, speak"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is about a woman speaking?", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "some men converse over an engine running"], "sample_ids": ["vXlk0lIQBFo", "sCiy7QS1U"], "start_seconds": ["470", "300"], "properties": ["wind, speak, vocalize", "men, converse, engine"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", null], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a conversation?", "label": 1}, {"captions": ["birds chirp as a train approaches", "a frog croaks as other frogs croak in the background"], "sample_ids": ["xM4joTqDVp4", "yswmmRZFItk"], "start_seconds": ["160", "0"], "properties": ["bird, chirp, train", "background, frog, croak"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "a close up of a frog in the water"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a frog is croaking"], "question": "which entity is a solitary animal", "label": 1}, {"captions": ["people speak and tapping occurs", "a man speaks as insects buzz and a bird chirps"], "sample_ids": ["tFCUUGdREgA", "t25U-v4k4ts"], "start_seconds": ["70", "40"], "properties": ["people, tap, speak", "a, chirps, bird"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a man is speaking and bees are buzzing"], "question": "which entity has a bird chirp?", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "pigeons vocalize and birds chirp"], "sample_ids": ["xERFUeZONz8", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["ring, approach, traffic", "vocalize, bird, chirp"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "of the pigeon in the cage"], "captions_pred_audio": ["an emergency vehicle siren blares", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "a man speaks as insects buzz and a bird chirps"], "sample_ids": ["uOpoD0gGXcs", "t25U-v4k4ts"], "start_seconds": ["120", "40"], "properties": ["chirps, woman, bird", "a, chirps, bird"], "captions_pred_video": ["a herd of cows grazing in the field", "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["birds are chirping and a man is speaking", "a man is speaking and bees are buzzing"], "question": "which entity is a response to a woman chirping for the birds?", "label": 0}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "wind blowing followed by a zoom"], "sample_ids": ["wP8ZKrlx3oA", "vr8ZXjEBhMQ"], "start_seconds": ["40", "150"], "properties": ["rain, storm, thunder", "wind, blow, zoom"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a heavy rain is falling on a surface", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a weather event", "label": 0}, {"captions": ["birds chirp and an insect buzzes around", "waves crash against a shoreline and people speak"], "sample_ids": ["t97k0cejSQE", "yFB25fqfU8I"], "start_seconds": ["250", "300"], "properties": ["bird, chirp, insect", "wave, crash, shoreline"], "captions_pred_video": ["a bee on a purple thistle flower", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more active", "label": 1}, {"captions": ["birds chirp and objects are moved around", "small dogs yip and bark sharply"], "sample_ids": ["yPUYU6t3rwo", "v-wcQf4BDY0"], "start_seconds": ["370", "120"], "properties": ["birds chirp, objects are moved around, birds", "bark, yip, sharply"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["insects buzz and a man speaks", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "plastic is tapped on while someone speaks"], "sample_ids": ["zcDwZ6W7E3E", "wvKpEYswXO0"], "start_seconds": ["180", "150"], "properties": ["man, speak, motorcycles", "plastic, tap, speak"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["a man speaks in the background while a slow tick repeats", "a stream of water runs briefly"], "sample_ids": ["vZAw4apG0Es", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["background, tick, repeat", "stream, water, run"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a clock is ticking and people are talking", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "a woman speaks happily and an animal chirps"], "sample_ids": ["wRV8yMk886E", "uWAAAL4CIoc"], "start_seconds": ["0", "0"], "properties": ["liquid, spray, nozzle", "a woman, chirps, animal"], "captions_pred_video": ["two cars are parked in a parking lot at night", null], "captions_pred_audio": ["a man speaks followed by a loud burst", "a woman is speaking and a dog is barking "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "a mechanical buzzing getting louder"], "sample_ids": ["siJFXfGWgDk", "sEprKHm8Sj8"], "start_seconds": ["50", "90"], "properties": ["a, bird, vehicle", "noise, loud, buzzing"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a race car accelerates and revs its engine "], "question": "which entity is a noise", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "a machine beeps continuously"], "sample_ids": ["vBslzh7saPw", "y682ml90jGw"], "start_seconds": ["90", "11"], "properties": ["power, scream, increase", "beeps, machine, continuously"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["a train horn blares as a train passes, then fades", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["zVacuqSb4LI", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["blares, fades, train", "motor noise, horn, siren"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is a warning", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["sapQIQUhFc", "uEU-Hg5MTN8"], "start_seconds": ["280", "27"], "properties": ["water, trickles, flow", "animal, grunts, snorts"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "wind blows strongly and a young man speaks"], "sample_ids": ["uWPRNLnpy7Y", "vs65y4qmyBE"], "start_seconds": ["10", "340"], "properties": ["accelerate, laugh, vehicle", "wind, blows, strongly"], "captions_pred_video": ["is taken from a car driving down the street", "a car is engulfed in flames on the side of the road"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a heavy engine is running and men are speaking "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a motorcycle engine is idling", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vZAqdHZ81yA", "xBxDz0CFVn0"], "start_seconds": ["180", "30"], "properties": ["engine, motorcycle, idling", "stream, water, flow"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage is blurry and out of focus"], "captions_pred_audio": ["an engine is idling loudly", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "an infant crying as a woman laughs"], "sample_ids": ["yYEVLuqEytU", "xhmRY9yhC7c"], "start_seconds": ["40", "20"], "properties": ["grunt, slurp, background", "a, laugh, infant"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "a motorcycle engine is idling"], "sample_ids": ["xV7Mg1QucSc", "vZAqdHZ81yA"], "start_seconds": ["14", "180"], "properties": ["alarm, ticktocks, laughs", "engine, motorcycle, idling"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "a motorcycle is parked on the side of the road with its rear end facing the viewer"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "an engine is idling loudly"], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "a stream of water flows as people talk and wind blows"], "sample_ids": ["y4tPJXBKDig", "xBxDz0CFVn0"], "start_seconds": ["20", "30"], "properties": ["a, noise, talk", "stream, water, flow"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a man is speaking with wind noise in the background "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a woman speaks and other women and a man talk with her", "a boat travels through the waves as the wind blows loudly and a man speaks over a radio"], "sample_ids": ["vbpKkWvfOu4", "tDVADusiIoc"], "start_seconds": ["560", "60"], "properties": ["a, woman, man", "wind, radio, waves"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking over a radio?", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by an electronic beep", "a siren comes to life as a horn blares"], "sample_ids": ["wSVhSdj0F0", "u--KhUW8l1Y"], "start_seconds": ["10", "0"], "properties": ["horn honks, keys jingle, electronic beep", "horn, siren, life"], "captions_pred_video": [null, "a firefighter spraying water from a fire hydrant at night"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a fire truck siren blares and a horn blows "], "question": "which entity is a siren?", "label": 1}, {"captions": ["a woman talking as an infant is crying", "here comes the train and it starts to blow the horn and get close"], "sample_ids": ["tMbMDvT50j8", "s7knHCFW82w"], "start_seconds": ["12", "30"], "properties": ["a, talk, infant", "blow horn, get close, train"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "footage of the train on the tracks near a building and a car parked on the side of the road"], "captions_pred_audio": ["a baby cries and a woman speaks", "a train is blowing its horn and its wheels are squealing "], "question": "which entity is moving", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "a vehicle accelerates squealing tires"], "sample_ids": ["w5W5Kqtc8E", "sd7xVssqlw"], "start_seconds": ["100", "50"], "properties": ["water, splashes, motorboat", "accelerates, tires, squealing"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "a man speaks as a motor runs in the background"], "sample_ids": ["yI-KvObbDoY", "xZepNM9qcRA"], "start_seconds": ["260", "30"], "properties": ["sound, smack, wind", "background, motor, run"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["someone snores nearby", "small dogs yip and bark sharply"], "sample_ids": ["spJCm8tD9Zo", "v-wcQf4BDY0"], "start_seconds": ["90", "120"], "properties": ["someone snores, nearby, someone", "bark, yip, sharply"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a person is snoring loudly", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a propeller rotates loudly and intensely", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["ugHJF0hfYkg", "zj2R0XoFr5k"], "start_seconds": ["10", "50"], "properties": ["loud, intense, propeller", "airplane, boy, fly"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman speaks while a helicopter flies overhead "], "question": "which is a moving object", "label": 1}, {"captions": ["a man is filing a hard object", "a clock ticktocks and sounds an alarm then a man laughs"], "sample_ids": ["vveS8HT7Uog", "xV7Mg1QucSc"], "start_seconds": ["100", "14"], "properties": ["a man, hard, object", "alarm, ticktocks, laughs"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "an alarm clock ticks and a woman laughs"], "question": "which entity is about a clock ticktocking and a man laughing?", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "propeller rearing loudly with some male and female voices interspersed in the background"], "sample_ids": ["zY3icUyMdh8", "vqZuVbG6-HI"], "start_seconds": ["20", "130"], "properties": ["dog, bark, engine", "background, male, female"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a lawn mower is running and men are speaking "], "question": "which entity has a male and female voice in the background?", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "three men talk while wind blows and some liquid flows"], "sample_ids": ["u5RmF3c3Aw", "vJ7JPEFhyLA"], "start_seconds": ["60", "16"], "properties": ["engine, car, zoom", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a car zooming by?", "label": 0}, {"captions": ["a vehicle engine runs while a siren and horn sound", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["u--KhUW8l1Y", "tiDFTC-5vU"], "start_seconds": ["0", "30"], "properties": ["engine, sound, horn", "male, duck, laugh"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", null], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["water flows followed by women screaming", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["w5W5Kqtc8E", "uEU-Hg5MTN8"], "start_seconds": ["100", "27"], "properties": ["water, flow, women", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a woman laughing?", "label": 1}, {"captions": ["music plays followed by gunshots and then an explosion", "a beep repeats multiple times"], "sample_ids": ["xKB8O8LTs6s", "y682ml90jGw"], "start_seconds": ["70", "11"], "properties": ["music, gunshots, explosion", "beep, repeat, multiple"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", null], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a beeping sound is being made "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "wind blowing followed by a zoom"], "sample_ids": ["ylpYOorfH4o", "vr8ZXjEBhMQ"], "start_seconds": ["410", "150"], "properties": ["motor, run, steady", "wind, blow, zoom"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking and an engine is revving", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "an engine runs loudly"], "sample_ids": ["vqZuVbG6-HI", "vqZuVbG6-HI"], "start_seconds": ["130", "130"], "properties": ["background, male, female", "loud, engine, run"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "a child speaks in closed space"], "sample_ids": ["w8uLijTqtlU", "yW6FWLSLkx4"], "start_seconds": ["70", "40"], "properties": ["wind, microphone, noise", "child, space, speak"], "captions_pred_video": ["footage is blurry and shaky", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["the wind is blowing strongly", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "multiple people speak and children yell while water gurgles"], "sample_ids": ["sWZzXuWYY", "vb1fPSDI4c"], "start_seconds": ["420", "30"], "properties": ["male, speech, banging", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a person is whistling a tune", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["scYRUkrFLiQ", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["a, tune, whistle", "male, duck, laugh"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", null], "captions_pred_audio": ["a person whistling a song", "a man is speaking and ducks are quacking"], "question": "which entity is speaking", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a clicking followed by some people laughing and a kid speaking"], "sample_ids": ["wvKpEYswXO0", "vz8868znkVQ"], "start_seconds": ["150", "60"], "properties": ["sound, water, running", "audio, click, kid speaking"], "captions_pred_video": ["of the person preparing food in the kitchen", "a video of a plane flying over a cloudy sky"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a baby is laughing and breathing with background noise "], "question": "which entity has a woman speaking softly?", "label": 0}, {"captions": ["people cheer as a vehicle engine revs", "a person snores loudly multiple times at a close distance"], "sample_ids": ["xjhAnI2q6hM", "sSMl2vc3ek"], "start_seconds": ["6", "20"], "properties": ["engine revs, vehicle, people", "loud, multiple, distance"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", null], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a person snoring loudly"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["xl2PIWyXaM", "sLUnaPT5gM8"], "start_seconds": ["160", "0"], "properties": ["chirp, man, younger person", "loud, laughter, intermittent"], "captions_pred_video": [null, "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["birds are chirping and people are talking", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["young female child snoring and breathing deeply", "food is frying while a woman speaks"], "sample_ids": ["sAam2NqGhLY", "yhQ2Lg-7qDY"], "start_seconds": ["20", "130"], "properties": ["snoring, breathing, child", "food, woman, speak"], "captions_pred_video": ["of a little girl sleeping on a couch", "a pan filled with meat and sauce being cooked on a stove top"], "captions_pred_audio": ["a person is snoring", "a faucet is running and a man is speaking"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["wz7N8YRy74I", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["rooster, crow, background, people", "People, motor, brakes"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", null], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running?", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["wDVMhEdTiVw", "ziUT9IFTkjg"], "start_seconds": ["30", "10"], "properties": ["gun, shoot, water", "background, birds, rustling"], "captions_pred_video": ["a blurry image of trees and water in the forest", null], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a heavy rain falls endlessly", "water rushes and then a vehicle zooms past"], "sample_ids": ["wP8ZKrlx3oA", "s4Uz1Ffgo04"], "start_seconds": ["40", "100"], "properties": ["heavy, rain, fall", "water, rushes, vehicle"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is moving faster", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a toilet flushes and water drains"], "sample_ids": ["vlJS7LN2XyM", "sfAvvZwdLCY"], "start_seconds": ["30", "20"], "properties": ["background, clocks, ticking", "water drains, flushes, water"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a ticktock of a clock", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["water flows and trickles", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["tB7hWb9gTuQ", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["water, flow, trickle", "engine, revs, vehicle"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["water is splashing and gurgling", "a race car accelerates and revs its engine "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "a woman speaks happily and an animal chirps"], "sample_ids": ["wP8ZKrlx3oA", "uWAAAL4CIoc"], "start_seconds": ["40", "0"], "properties": ["rain, storm, thunder", "a woman, chirps, animal"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", null], "captions_pred_audio": ["a heavy rain is falling on a surface", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["yks4cLgIDMc", "vfYTJq7nU"], "start_seconds": ["170", "130"], "properties": ["background, speaking, child", "rustling, ducks, quack"], "captions_pred_video": ["footage of two kids wrestling on the floor", null], "captions_pred_audio": ["a man is speaking and a child is crying", "a duck quacks and a woman speaks"], "question": "which entity has a child shouting in the background?", "label": 0}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "a infant makes noise and is excited"], "sample_ids": ["xNMovAf3o50", "wIJK3-5y0kA"], "start_seconds": ["0", "30"], "properties": ["rain, thunder, music", "noise, excited, infant"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["thunder and rain with music playing in the background ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person is burping while a girl speaks", "a duck quacks loudly and continuously"], "sample_ids": ["vdoxuJn9lTc", "vh30P49Po6s"], "start_seconds": ["40", "30"], "properties": ["person, burp, girl", "loud, continuous, quacks"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a child speaks followed by a burp", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["xNMovAf3o50", "vlS6YMeWAPo"], "start_seconds": ["0", "40"], "properties": ["rain, thunder, music", "sheep, baa, birds"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["thunder and rain with music playing in the background ", "a goat bleats and birds chirp"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "a car speeding up in the distance"], "sample_ids": ["ul60S8TXDA8", "u0TrcHhkPQ"], "start_seconds": ["60", "20"], "properties": ["sound, distance, bell", "distance, car, speed"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", null], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a girl speaks followed by a scream and more girls talking", "dishes cling together then a man begins to speak"], "sample_ids": ["uYT5gxnyMWM", "sQGXqGcwOTc"], "start_seconds": ["50", "3"], "properties": ["a, scream, girl", "cling, speak, dishes"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "mechanisms are operating and water is splashing "], "question": "which entity is about a girl speaking followed by a scream and more girls talking?", "label": 0}, {"captions": ["a girl speaks followed by a scream and more girls talking", "an infant crying as a woman laughs"], "sample_ids": ["uYT5gxnyMWM", "xhmRY9yhC7c"], "start_seconds": ["50", "20"], "properties": ["a, scream, girl", "a, laugh, infant"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "some men converse over an engine running"], "sample_ids": ["wTideSjRFS0", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["food, sizzle, woman", "men, converse, engine"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a woman speaking as frying food sizzles?", "label": 0}, {"captions": ["a baby cries and a woman speaks", "an infant crying frantically"], "sample_ids": ["tMbMDvT50j8", "zwOBqeFTgiU"], "start_seconds": ["12", "30"], "properties": ["a, cry, woman", "cry, infant, frantically"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "of the baby crying in the car seat"], "captions_pred_audio": ["a baby cries and a woman speaks", "a baby cries loudly"], "question": "which entity is crying frantically", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "a car speeding up in the distance"], "sample_ids": ["vZAw4apG0Es", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["background, clock, ticktocks", "distance, car, speed"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "people cheer as a vehicle engine revs"], "sample_ids": ["rqu8iB22IY", "xjhAnI2q6hM"], "start_seconds": ["5", "6"], "properties": ["sound, repeats, laugh", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a truck is revving its engine and a man is speaking "], "question": "which entity has more people", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["yswmmRZFItk", "uZesmtKZGSw"], "start_seconds": ["0", "250"], "properties": ["background, frog, croak", "men, talk, cars"], "captions_pred_video": ["a close up of a frog in the water", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a frog is croaking", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a machine beeps continuously", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["y682ml90jGw", "tdWhHV3X25Q"], "start_seconds": ["11", "60"], "properties": ["beeps, machine, continuously", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a beeping sound is being made ", "a man is speaking and a crowd is clapping"], "question": "which entity is a response to a performance", "label": 1}, {"captions": ["a heavy rain falls endlessly", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["wP8ZKrlx3oA", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["heavy, rain, fall", "a woman, laughs, animal"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "small dogs yip and bark sharply"], "sample_ids": ["ukxt9I7eMMg", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["food, pan, cook", "bark, yip, sharply"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaking with light rustling", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["zOZleIRqZm4", "xfaoyyzw2WU"], "start_seconds": ["80", "180"], "properties": ["light, rustling, man", "loud, jet engine, roar"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["zgUgkpk78xU", "uZesmtKZGSw"], "start_seconds": ["70", "250"], "properties": ["horn, bells, ring", "men, talk, cars"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a baby laugh at a sputter", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sLUnaPT5gM8", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["laugh, sputter, baby", "engine, idle, woman"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a woman is speaking and a subway train is moving "], "question": "which entity is a person", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a stream of water runs briefly"], "sample_ids": ["ugHJF0hfYkg", "x-PeY8Yb8M4"], "start_seconds": ["10", "300"], "properties": ["engine, running, continuously", "stream, water, run"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a helicopter is flying overhead ", "a car is driving on a wet road "], "question": "which entity is running continuously", "label": 0}, {"captions": ["a person whistles and clicks a mouse", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zCrAfDfv6-A", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["person, mouse, click", "a woman, something, fried"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a person whistles a song", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["tapping occurs then a baby cries", "music plays and a woman speaks on a radio before gunshots are fired"], "sample_ids": ["wIJK3-5y0kA", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["a, cry, baby", "music, radio, gunshots"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a baby cries and a woman speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is about a baby crying?", "label": 0}, {"captions": ["birds chirp as a train approaches", "three men talk while wind blows and some liquid flows"], "sample_ids": ["xM4joTqDVp4", "vJ7JPEFhyLA"], "start_seconds": ["160", "16"], "properties": ["bird, chirp, train", "three men, wind, flow"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a train?", "label": 0}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "a man speaks as a motor runs in the background"], "sample_ids": ["w6RTHR6AeAg", "xZepNM9qcRA"], "start_seconds": ["40", "30"], "properties": ["call, owl, screech", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "birds chirp and objects are moved around"], "sample_ids": ["xZepNM9qcRA", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["background, motor, run", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "paper folding and crinkling"], "sample_ids": ["yYEVLuqEytU", "zPpG3RD8lSs"], "start_seconds": ["40", "20"], "properties": ["grunt, slurp, background", "paper, fold, crinkle"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["several sheep bleat and a man speaks", "the wind blows and a mouse clicks "], "question": "which entity is not a person", "label": 1}, {"captions": ["water gurgles, metal squeaks and the water stops", "water flows as men speak and yell"], "sample_ids": ["x4a9YGIw4ok", "vJ7JPEFhyLA"], "start_seconds": ["120", "16"], "properties": ["water, gurgles, stops", "water, flow, men"], "captions_pred_video": ["footage is blurry and out of focus", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a toilet flushes and water splashes", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more water", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["tDVADusiIoc", "zFjIWfSD-4"], "start_seconds": ["60", "410"], "properties": ["man, radio, blows", "People, motor, brakes"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a man speaking on a radio as wind blows?", "label": 0}, {"captions": ["a car accelerates and wind blows", "people speak as gunfire rings out"], "sample_ids": ["u0TrcHhkPQ", "wqTCwqVRDlk"], "start_seconds": ["20", "80"], "properties": ["accelerates, wind, blows", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["wnpJndXuxLc", "su6FAOcOA8c"], "start_seconds": ["50", "4"], "properties": ["beeps, loud, whistle", "engine, idle, woman"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a woman is speaking and a subway train is moving "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "plastic is tapped on while someone speaks"], "sample_ids": ["wTjoRj1se3U", "wvKpEYswXO0"], "start_seconds": ["390", "150"], "properties": ["engine, run, people", "plastic, tap, speak"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a jet engine is running and people are talking", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "a vehicle accelerates squealing tires"], "sample_ids": ["zkKdxzNC97Y", "sd7xVssqlw"], "start_seconds": ["27", "50"], "properties": ["hard, surface, door", "accelerates, tires, squealing"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "a car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a person sniffles and then sneezes in the distance", "a man speaks as a car is passing by"], "sample_ids": ["uRlbY6aoBU", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["a, distance, sneeze", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is sneezing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "water is sprayed across a hard surface"], "sample_ids": ["w0xsN8X18Y", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["music, surface, rain", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "spraying followed by silence"], "question": "which entity is sprayed across a hard surface?", "label": 1}, {"captions": ["a helicopter engine runs", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["t5ZbXbniOWk", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["engine, helicopter, run", "a woman, a television program, a bird"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a bird?", "label": 1}, {"captions": ["water gurgles, metal squeaks and the water stops", "pigeons vocalize and birds chirp"], "sample_ids": ["x4a9YGIw4ok", "uiS58TNyUiw"], "start_seconds": ["120", "430"], "properties": ["water, gurgles, stops", "vocalize, bird, chirp"], "captions_pred_video": ["footage is blurry and out of focus", "of the pigeon in the cage"], "captions_pred_audio": ["a toilet flushes and water splashes", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["food is frying while a woman speaks", "wind blowing followed by a zoom"], "sample_ids": ["yhQ2Lg-7qDY", "vr8ZXjEBhMQ"], "start_seconds": ["130", "150"], "properties": ["food, woman, speak", "wind, blow, zoom"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a faucet is running and a man is speaking", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "some men converse over an engine running"], "sample_ids": ["xyL9F5VrjkE", "sCiy7QS1U"], "start_seconds": ["20", "300"], "properties": ["engine, run, wind", "men, converse, engine"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", null], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["continuous snoring", "bird squawks are accompanied by a man and woman speaking"], "sample_ids": ["sLkeqCDJIyw", "wqZ135Ssz0"], "start_seconds": ["120", "60"], "properties": ["loud, snoring, noise", "man, woman, squawks"], "captions_pred_video": [", what is the man doing on the couch? sleeping", null], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["paper folding and crinkling", "several insects fly while two men talk"], "sample_ids": ["zPpG3RD8lSs", "s-T9OVOiMLo"], "start_seconds": ["20", "330"], "properties": ["paper, fold, crinkle", "several, fly, men"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["birds chirp and pigeons vocalize while walking around", "a bird is chirping and tweeting a bird song"], "sample_ids": ["wIvYjuR3nrg", "wPz6QRAkEb4"], "start_seconds": ["9", "60"], "properties": ["birds, pigeons, vocalize", "chirps, tweets, song"], "captions_pred_video": ["footage of a pigeon sitting on a roof with trees in the background", "a bird in a cage on top of a pole"], "captions_pred_audio": ["birds are chirping and cooing", "birds are chirping in the background "], "question": "which bird is singing", "label": 1}, {"captions": ["a man speaks, then dials a rotary telephone", "a male speaks and another male speaks"], "sample_ids": ["tK4VlLsNxak", "viuTg1M-dqg"], "start_seconds": ["120", "30"], "properties": ["a, dial, telephone", "two males, speaking, male"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "a muffled toilet flushes and the water drains"], "sample_ids": ["uZesmtKZGSw", "sfAvvZwdLCY"], "start_seconds": ["250", "20"], "properties": ["car, track, man", "flushes, drains, water"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a toilet is flushed"], "question": "which entity has water draining?", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["zhx6hoYrHeI", "y2bVZ7rz-5M"], "start_seconds": ["160", "280"], "properties": ["engine, sputter, rough", "motor noise, horn, siren"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is followed by a horn honking and a siren wailing?", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "pigeons vocalize and birds chirp"], "sample_ids": ["zl9Dqx-j7q4", "uiS58TNyUiw"], "start_seconds": ["6", "430"], "properties": ["engine, laugh, loud", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a man driving a car in the dark", "of the pigeon in the cage"], "captions_pred_audio": ["a jet engine roars ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["continuous sneezing together with speech", "a duck quacks continuously"], "sample_ids": ["x4dZyf9Gbj0", "vh30P49Po6s"], "start_seconds": ["130", "30"], "properties": ["continuous, sneeze, speech", "quacks, continuously, duck"], "captions_pred_video": ["footage is blurry and out of focus", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman sneezes and speaks", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a helicopter engine idles continuously", "a car accelerates and wind blows"], "sample_ids": ["ugHJF0hfYkg", "u0TrcHhkPQ"], "start_seconds": ["10", "20"], "properties": ["engine, idle, continuously", "accelerates, wind, blows"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["w5W5Kqtc8E", "w34HjHr6gAY"], "start_seconds": ["100", "30"], "properties": ["wind, blow, vehicle", "beeps, hit, woman"], "captions_pred_video": [null, "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "water flows and trickles"], "sample_ids": ["vbr9mHKc8WM", "tB7hWb9gTuQ"], "start_seconds": ["40", "30"], "properties": ["noise, loudness, engine", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["an engine is idling", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["water pouring and bubbling", "three men talk while wind blows and some liquid flows"], "sample_ids": ["uyRfq-jKPpo", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["water, bubbles, pouring", "three men, wind, flow"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["water is running from a faucet", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a liquid flowing?", "label": 1}, {"captions": ["someone whistles a song", "a man speaks as a car is passing by"], "sample_ids": ["sIXTftIuUgw", "sK4u5T8hW78"], "start_seconds": ["90", "30"], "properties": ["someone, song, whistle", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a person whistling a song", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["xERFUeZONz8", "xfaoyyzw2WU"], "start_seconds": ["0", "180"], "properties": ["ring, approach, traffic", "loud, jet engine, roar"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["an emergency vehicle siren blares", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "people applaud and hoot and chat quietly"], "sample_ids": ["tQWGZLItBXk", "wwyfGO2J4"], "start_seconds": ["170", "90"], "properties": ["music, person, ding", "people, applaud, hoot"], "captions_pred_video": ["worms revolution screenshots", null], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "people are clapping and speaking with background noise "], "question": "which entity has more people", "label": 1}, {"captions": ["a small engine spits as it runs", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sZvwOuuPGP0", "zj2R0XoFr5k"], "start_seconds": ["50", "50"], "properties": ["spits, engine, runs", "airplane, boy, fly"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a medium engine is running ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["food is frying while a woman speaks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["yhQ2Lg-7qDY", "xBxDz0CFVn0"], "start_seconds": ["130", "30"], "properties": ["food, woman, speak", "stream, water, flow"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "footage is blurry and out of focus"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["dogs barking and whimpering", "a baby cries and a woman speaks"], "sample_ids": ["tIY7qOV3rEM", "tMbMDvT50j8"], "start_seconds": ["0", "12"], "properties": ["barking, whimpering, dog", "a, cry, woman"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "shows a little girl covering her face with her hands while sitting at a table"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a baby cries and a woman speaks"], "question": "which entity is a human", "label": 1}, {"captions": ["distant humming of an engine", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["yVPZ2MNWpms", "wz7N8YRy74I"], "start_seconds": ["0", "30"], "properties": ["sound, distance, engine", "rooster, crow, background, men"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a car is driving by on the road ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster crow?", "label": 1}, {"captions": ["people speak and tapping occurs", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["tFCUUGdREgA", "xfaoyyzw2WU"], "start_seconds": ["70", "180"], "properties": ["people, tap, speak", "loud, jet engine, roar"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["a bird is chirping and tweeting a bird song", "a child speaks in closed space"], "sample_ids": ["wPz6QRAkEb4", "yW6FWLSLkx4"], "start_seconds": ["60", "40"], "properties": ["chirps, tweets, song", "child, space, speak"], "captions_pred_video": ["a bird in a cage on top of a pole", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["birds are chirping in the background ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sNB8zxXneIM", "xKB8O8LTs6s"], "start_seconds": ["20", "70"], "properties": ["several, quack, cocks", "music, gunfire, explosion"], "captions_pred_video": ["a group of geese in a cage", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more active", "label": 1}, {"captions": ["someone sprays a liquid onto a hard surface making a hiss sound", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zO-LSSY92ZM", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["liquid, surface, sound", "a woman, something, fried"], "captions_pred_video": ["youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'", "- a woman cooking in the kitchen"], "captions_pred_audio": ["steam is hissing and hissing", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking", "label": 1}, {"captions": ["scraping and female speech with distant music", "a man talks as something metal hits against and glass is set down"], "sample_ids": ["yHeVV-xeOxQ", "x6ijhqRY38s"], "start_seconds": ["130", "250"], "properties": ["female, speech, music", "something metal, glass, hit"], "captions_pred_video": ["of a girl milking a goat's udder", "a chef preparing a dish with a bottle of wine and a plate of food on a table"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a man is speaking and dishes are clanging "], "question": "which entity is about something hitting something?", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["x6ijhqRY38s", "wqZ135Ssz0"], "start_seconds": ["250", "60"], "properties": ["something metal, glass, hit", "two men, woman, birds"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", null], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "a vehicle engine accelerating then running on idle"], "sample_ids": ["zdYdyF9-m8U", "vYkA3cfXp5Q"], "start_seconds": ["7", "30"], "properties": ["wind, crash, shoreline", "engine, accelerate, idle"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["waves crash and wind blows ", "an engine is idling"], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a jet engine spools up and takes off", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vBslzh7saPw", "vJ7JPEFhyLA"], "start_seconds": ["90", "16"], "properties": ["engine, spools, takes", "three men, wind, flow"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a moving object", "label": 0}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["vuUVPzd2FXw", "zFjIWfSD-4"], "start_seconds": ["160", "410"], "properties": ["a, steam, release", "People, motor, brakes"], "captions_pred_video": ["of the person cooking on the grill with a spatula", null], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a man talking?", "label": 0}, {"captions": ["an electric engine works nearby followed by a child talking", "a woman speaks as she rubs two objects together"], "sample_ids": ["xSKJGCItUWE", "vzxHnu-SFEw"], "start_seconds": ["10", "80"], "properties": ["engine, work, child", "two objects, woman, speak"], "captions_pred_video": ["footage of the helicopter flying in the room", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a machine?", "label": 0}, {"captions": ["an airplane engine runs", "water flows and trickles"], "sample_ids": ["yVPZ2MNWpms", "tB7hWb9gTuQ"], "start_seconds": ["0", "30"], "properties": ["engine, airplane, runs", "water, flow, trickle"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a car is driving by on the road ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "birds chirp and objects are moved around"], "sample_ids": ["yswmmRZFItk", "yPUYU6t3rwo"], "start_seconds": ["0", "370"], "properties": ["background, frog, croak", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a close up of a frog in the water", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a frog is croaking", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a man talks as several small engines run", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["u9A6VZQCZpU", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["a, man, talk", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a man is speaking with wind noise in the background "], "question": "which entity is about a man talking?", "label": 0}, {"captions": ["a wooden clack accompanies nearby chirping birds", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["yeFvk9x0wWI", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["clack, bird, chirp", "a woman, something, fried"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "- a woman cooking in the kitchen"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "a vehicle engine accelerating then running on idle"], "sample_ids": ["ukxt9I7eMMg", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["continuous, woman, speaking", "engine, accelerate, idle"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["an airplane engine runs", "roadway noise occurs and a truck accelerates"], "sample_ids": ["yVPZ2MNWpms", "tgbONvsP47Y"], "start_seconds": ["0", "0"], "properties": ["engine, airplane, runs", "noise, truck, accelerate"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a car is driving by on the road ", "a car is driving on the road "], "question": "which is not a source of noise", "label": 0}, {"captions": ["a duck quacks loudly and continuously", "birds chirp and objects are moved around"], "sample_ids": ["vh30P49Po6s", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["loud, continuous, quacks", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a duck is quacking loudly", "insects buzz and a man speaks"], "question": "which entity is quieter", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "a drill drills through something then people begin laughing"], "sample_ids": ["xvDdE3zNf8Y", "tEE3MpBt1sg"], "start_seconds": ["120", "50"], "properties": ["a, female, speaks", "drill, something, laugh"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a woman speaks and crumples paper", "people are laughing breathing and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "a infant makes noise and is excited"], "sample_ids": ["u21-Z5gJCB8", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["background, voice, man", "noise, excited, infant"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a person sniffles and sneezes", "people speak as gunfire rings out"], "sample_ids": ["uRlbY6aoBU", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["sneezes, sniffles, person", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is sneezing ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "someone is typing on a computer keyboard"], "sample_ids": ["sZPuqDgX2V0", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["engine, accelerate, intercom", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a person is typing on a keyboard"], "question": "which entity is stationary", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "wind blows as people chatter quietly"], "sample_ids": ["rwtmaKiCcQU", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["nozzle, depressed, spray can", "wind, chatter, people"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "footage is blurry and out of focus"], "captions_pred_audio": ["spraying and people speaking", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["someone snores nearby", "a man speaks as a motor runs in the background"], "sample_ids": ["spJCm8tD9Zo", "xZepNM9qcRA"], "start_seconds": ["90", "30"], "properties": ["someone snores, nearby, someone", "background, motor, run"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a person is snoring loudly", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["children speak as a female ask them questions", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["wEBlkGWVWwE", "wDVMhEdTiVw"], "start_seconds": ["260", "30"], "properties": ["female, speak, questions", "gun, shoot, water"], "captions_pred_video": ["shows a person writing on the whiteboard", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a game", "label": 1}, {"captions": ["an airplane engine runs", "a woman speaks as she rubs two objects together"], "sample_ids": ["yVPZ2MNWpms", "vzxHnu-SFEw"], "start_seconds": ["0", "80"], "properties": ["engine, airplane, runs", "two objects, woman, speak"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a car is driving by on the road ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which object is moving", "label": 0}, {"captions": ["a female speaks softly as paper crinkles", "people speak as gunfire rings out"], "sample_ids": ["xvDdE3zNf8Y", "wqTCwqVRDlk"], "start_seconds": ["120", "80"], "properties": ["a, female, speaks", "gunfire, ring, speak"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking and a gun is fired"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a goat bleats and someone makes a calling noise", "a clock ticktocks"], "sample_ids": ["vlS6YMeWAPo", "v-g-j2uTByM"], "start_seconds": ["40", "30"], "properties": ["noise, bleat, call", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a goat bleats and birds chirp", "a clock is ticking loudly"], "question": "which entity makes a ticktocks noise", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vZAw4apG0Es", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["background, clock, ticktocks", "female, spraying, scream"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a clock is ticking and people are talking", "a woman is speaking and a baby is crying"], "question": "which entity is more violent", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a train engine runs and a horn blows"], "sample_ids": ["wz7N8YRy74I", "zPX9o1uDiI"], "start_seconds": ["30", "40"], "properties": ["rooster, crow, background, people", "engine, horn, run"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", null], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a train moves with its horn blowing and wheels squealing "], "question": "which entity is a train", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "a woman speaks happily and an animal chirps"], "sample_ids": ["sAam2NqGhLY", "uWAAAL4CIoc"], "start_seconds": ["20", "0"], "properties": ["snoring, breathing, child", "a woman, chirps, animal"], "captions_pred_video": ["of a little girl sleeping on a couch", null], "captions_pred_audio": ["a person is snoring", "a woman is speaking and a dog is barking "], "question": "which entity is a person", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "paper is crumpling consistently"], "sample_ids": ["t97k0cejSQE", "v5cSxLaHADY"], "start_seconds": ["250", "0"], "properties": ["bird, chirp, insect", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a bee on a purple thistle flower", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "an electric engine works nearby followed by a child talking"], "sample_ids": ["vimzuGQvdcU", "xSKJGCItUWE"], "start_seconds": ["30", "10"], "properties": ["a, man, yells", "engine, work, child"], "captions_pred_video": ["a group of people are rafting down a river", "footage of the helicopter flying in the room"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a high pitched engine is running and a child speaks"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["goats bleat and metal clings", "a car speeding up in the distance"], "sample_ids": ["tH17JPjDPnc", "u0TrcHhkPQ"], "start_seconds": ["260", "20"], "properties": ["bleat, metal, clings", "distance, car, speed"], "captions_pred_video": ["feed of the goats eating hay in the barn", null], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "an engine idles consistently before sputtering some"], "sample_ids": ["yRx9txMcBl0", "rwTERCUno"], "start_seconds": ["40", "90"], "properties": ["accelerates, tires, squeals", "engine, idle, sputter"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", null], "captions_pred_audio": ["a car is revving its engine and skidding ", "an engine is idling and vibrating"], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["birds chirp as a train approaches", "a clock ticktocks"], "sample_ids": ["xM4joTqDVp4", "v-g-j2uTByM"], "start_seconds": ["160", "30"], "properties": ["bird, chirp, train", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a cat meows and children speak", "water flows and trickles"], "sample_ids": ["x5cuQjOdM3E", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["cat, speak, children", "water, flow, trickle"], "captions_pred_video": ["a black background with an airplane flying in the sky", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a cat meows and a woman speaks", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a helicopter engine runs", "small dogs yip and bark sharply"], "sample_ids": ["t5ZbXbniOWk", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["engine, helicopter, run", "bark, yip, sharply"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a helicopter is flying overhead ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "paper is crumpling consistently"], "sample_ids": ["su6FAOcOA8c", "v5cSxLaHADY"], "start_seconds": ["4", "0"], "properties": ["engine, idle, woman", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a woman speaks and other women and a man talk with her", "winds blows roughly as a vehicle races past"], "sample_ids": ["vbpKkWvfOu4", "xjvTpk2Zpr8"], "start_seconds": ["560", "70"], "properties": ["a, woman, man", "wind, blows, vehicle"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a jet engine roars and wind blows "], "question": "which entity is a person", "label": 0}, {"captions": ["birds chirp and a dog breathes heavily", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["y2ZBGpgbhHM", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["dog, chirp, breathe", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["birds chirping and a dog panting", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "a machine engine runs and a man speaks"], "sample_ids": ["u21-Z5gJCB8", "vs65y4qmyBE"], "start_seconds": ["30", "340"], "properties": ["background, voice, man", "engine, run, man"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "a car is engulfed in flames on the side of the road"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a heavy engine is running and men are speaking "], "question": "which entity has a man speaking with another voice speaking in the background?", "label": 0}, {"captions": ["a woman speaks and other women and a man talk with her", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vbpKkWvfOu4", "tdWhHV3X25Q"], "start_seconds": ["560", "60"], "properties": ["a, woman, man", "applause, audience, yells"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a man is speaking and a crowd is clapping"], "question": "which entity has more people", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "paper is crumpling consistently"], "sample_ids": ["xvDdE3zNf8Y", "v5cSxLaHADY"], "start_seconds": ["120", "0"], "properties": ["a, female, speaks", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman speaks and crumples paper", "paper is crumpled and crinkled"], "question": "which entity is crumpling consistently", "label": 1}, {"captions": ["a baby laugh at a sputter", "a stream of water runs briefly"], "sample_ids": ["sLUnaPT5gM8", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["laugh, sputter, baby", "stream, water, run"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["material crumbles into a microphone", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["vofpvUo6NAw", "ziUT9IFTkjg"], "start_seconds": ["220", "10"], "properties": ["material, crumbles, microphone", "background, birds, rustling"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", null], "captions_pred_audio": ["paper is being crumpled and crinkled", "birds are chirping and a chime is ringing "], "question": "which entity is more likely to be found in a forest", "label": 1}, {"captions": ["some men converse over an engine running", "insects humming with a dog barking and small goat bleating"], "sample_ids": ["sCiy7QS1U", "tIY7qOV3rEM"], "start_seconds": ["300", "0"], "properties": ["men, converse, engine", "animal, bark, dog, barking, small, goat, bleating"], "captions_pred_video": [null, "a dog is standing in the middle of a dirt road in the woods"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a dog is barking and a cat is meowing"], "question": "which animal is barking", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "a steam engine runs and whistles as it passes by"], "sample_ids": ["u5RmF3c3Aw", "se87d6yxEOA"], "start_seconds": ["60", "10"], "properties": ["engine, car, zoom", "run, whistle, pass"], "captions_pred_video": [null, "footage of a train passing by a train station with smoke billowing out of the train's smokestack"], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a train is moving and blowing its whistle "], "question": "which entity is a steam engine?", "label": 1}, {"captions": ["a cat meows and children speak", "a man speaks as a car is passing by"], "sample_ids": ["x5cuQjOdM3E", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["cat, speak, children", "a, car, pass"], "captions_pred_video": ["a black background with an airplane flying in the sky", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vf44CgrjT0A", "uZesmtKZGSw"], "start_seconds": ["20", "250"], "properties": ["loud, long, person", "men, talk, cars"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a loud burp", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "water flows and trickles"], "sample_ids": ["vVhthZ45k3Y", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["cat, purr, hiss", "water, flow, trickle"], "captions_pred_video": ["footage is blurry and out of focus", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "an infant crying as a woman laughs"], "sample_ids": ["vW4x7S1VfQc", "xhmRY9yhC7c"], "start_seconds": ["150", "20"], "properties": ["clacking, oil, woman", "a, laugh, infant"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["food sizzles in a frying pan", "a baby cries and a woman speaks"], "question": "which entity is about a woman talking?", "label": 0}, {"captions": ["leaves rustle while man speaks", "a steam engine runs and whistles as it passes by"], "sample_ids": ["zOZleIRqZm4", "se87d6yxEOA"], "start_seconds": ["80", "10"], "properties": ["leaves, rustle, speak", "run, whistle, pass"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of a train passing by a train station with smoke billowing out of the train's smokestack"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a train is moving and blowing its whistle "], "question": "which entity is moving", "label": 1}, {"captions": ["a car accelerates and wind blows", "an engine runs loudly"], "sample_ids": ["u0TrcHhkPQ", "vqZuVbG6-HI"], "start_seconds": ["20", "130"], "properties": ["accelerates, wind, blows", "loud, engine, run"], "captions_pred_video": [null, "footage is blurry because it's raining outside"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "paper is crumpling consistently"], "sample_ids": ["w0xsN8X18Y", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["music, surface, rain", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a horn honks and then loudly blares", "a loud engine muffles a man as he speaks"], "sample_ids": ["wnpJndXuxLc", "xyx6eNVEYRY"], "start_seconds": ["50", "380"], "properties": ["horn, honk, loud", "loud, engine, muffles"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "footage of a helicopter landing on a runway at an airport"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "an aircraft engine is running and a man is speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["ylpYOorfH4o", "zj2R0XoFr5k"], "start_seconds": ["410", "50"], "properties": ["engine, running, wind", "airplane, boy, fly"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "water splashes as an animal walks through"], "sample_ids": ["vbZ-0lGPneg", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["a woman, a television program, a bird", "animal, water, splashes"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["humming and rattling of an engine idling as it revs", "a vehicle accelerates and squeals tires"], "sample_ids": ["xMXvkIcaG0Y", "yRx9txMcBl0"], "start_seconds": ["30", "40"], "properties": ["sound, humming, rattling", "accelerates, tires, squeals"], "captions_pred_video": ["footage of a car's hood being opened up to reveal the engine underneath the hood", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["an engine is revving and accelerating ", "a car is revving its engine and skidding "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xfaoyyzw2WU", "tDVADusiIoc"], "start_seconds": ["180", "60"], "properties": ["loud, jet engine, roar", "water, radio, man"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which is louder", "label": 1}, {"captions": ["a helicopter engine runs continuously", "pigeons vocalize and birds chirp"], "sample_ids": ["ugHJF0hfYkg", "uiS58TNyUiw"], "start_seconds": ["10", "430"], "properties": ["engine, running, continuously", "vocalize, bird, chirp"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "of the pigeon in the cage"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["water flows as a woman laughs and a man speaks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vddP56-ogds", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["water, flow, laugh", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "an infant crying as a woman laughs"], "sample_ids": ["vf44CgrjT0A", "xhmRY9yhC7c"], "start_seconds": ["20", "20"], "properties": ["loud, long, person", "a, laugh, infant"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a loud burp", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "a car speeding up in the distance"], "sample_ids": ["s6DESzUTGjY", "u0TrcHhkPQ"], "start_seconds": ["16", "20"], "properties": ["wind, laugh, woman", "distance, car, speed"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", null], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["animals bleat and cry out and then a woman speaks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["yZp6xizR0yU", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["animal, bleat, cry", "a woman, something, fried"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a woman is speaking while food is frying in the background"], "question": "which entity is about a woman talking?", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["yLy-WycbVVE", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["background, people, talk", "engine, laugh, loud"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "a man sprays as a scraping occurs in the background"], "sample_ids": ["uYT5gxnyMWM", "sOa7g-44Dag"], "start_seconds": ["50", "30"], "properties": ["female, spraying, scream", "background, man, spray"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking and rubbing his hands together "], "question": "which entity has a female spraying?", "label": 0}, {"captions": ["white noise and birds chirping", "a man speaks followed by another man speaking outside"], "sample_ids": ["wRBHTgrbiwg", "viuTg1M-dqg"], "start_seconds": ["50", "30"], "properties": ["noise, white, chirping", "two men, speak, follow"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a man speaking followed by another man speaking outside?", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "people cheer as a vehicle engine revs"], "sample_ids": ["uEU-Hg5MTN8", "xjhAnI2q6hM"], "start_seconds": ["27", "6"], "properties": ["a woman, laughs, animal", "engine revs, vehicle, people"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a truck is revving its engine and a man is speaking "], "question": "which entity has more people", "label": 1}, {"captions": ["a horse runs while two women talk", "a person snores loudly multiple times at a close distance"], "sample_ids": ["sdvI1mHAsc", "sSMl2vc3ek"], "start_seconds": ["20", "20"], "properties": ["two women, horse, run", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wDVMhEdTiVw", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["gun, shoot, water", "loud, multiple, distance"], "captions_pred_video": ["a blurry image of trees and water in the forest", null], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "someone snores nearby"], "sample_ids": ["zofjfKhqLk8", "spJCm8tD9Zo"], "start_seconds": ["10", "90"], "properties": ["background, metal, clings", "someone snores, nearby, someone"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["wztCSUxOf8", "sLUnaPT5gM8"], "start_seconds": ["130", "0"], "properties": ["a crowd, yells, applauds", "loud, laughter, intermittent"], "captions_pred_video": [null, "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["dogs barking and whimpering", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["tIY7qOV3rEM", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["barking, whimpering, dog", "female, spraying, scream"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "a toilet flushes and a female speaks"], "sample_ids": ["xyx6eNVEYRY", "yaln9y8I7ms"], "start_seconds": ["380", "230"], "properties": ["loud, engine, muffles", "female, flushes, toilet"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "footage is blurry and out of focus"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["sfAvvZwdLCY", "y8WEcpOlT3I"], "start_seconds": ["20", "40"], "properties": ["water drains, flushes, water", "harsh, wind, blows"], "captions_pred_video": ["footage of the toilet in the bathroom", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking with wind noise in the background "], "question": "which entity is a source of water", "label": 0}, {"captions": ["dogs barking and whimpering", "an infant crying frantically"], "sample_ids": ["tIY7qOV3rEM", "zwOBqeFTgiU"], "start_seconds": ["0", "30"], "properties": ["barking, whimpering, dog", "cry, infant, frantically"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "of the baby crying in the car seat"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a railroad crossing bell rings as a train horn blows", "birds chirp and an insect buzzes around"], "sample_ids": ["tZGN5a7ybxo", "t97k0cejSQE"], "start_seconds": ["60", "250"], "properties": ["ring, train, horn", "bird, chirp, insect"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "a bee on a purple thistle flower"], "captions_pred_audio": ["a train is moving and blowing its horn ", "a bee buzzes and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a train engine runs and a horn blows", "a vehicle accelerates and squeals tires"], "sample_ids": ["zPX9o1uDiI", "yRx9txMcBl0"], "start_seconds": ["40", "40"], "properties": ["engine, horn, run", "accelerates, tires, squeals"], "captions_pred_video": [null, "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "a car is revving its engine and skidding "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "birds chirp and objects are moved around"], "sample_ids": ["u21-Z5gJCB8", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["background, voice, man", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["shmR4OZtzqA", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["man, engine, idle", "a woman, something, fried"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man speaks while a motor runs", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "a telephone rings followed by a woman talking"], "sample_ids": ["wjsXBsc7M40", "tGcFnX0GHI"], "start_seconds": ["10", "0"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "ring, talk, woman"], "captions_pred_video": ["footage of the baby playing with a toothbrush", null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a person whistles and clicks a mouse", "a man speaks as a car is passing by"], "sample_ids": ["zCrAfDfv6-A", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["person, mouse, click", "a, car, pass"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a person whistles a song", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a person clicking a mouse?", "label": 0}, {"captions": ["wind blows and people scream while an engine revs", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["w5W5Kqtc8E", "vbZ-0lGPneg"], "start_seconds": ["100", "30"], "properties": ["wind, engine, scream", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman is speaking and a dog is whimpering"], "question": "which entity is more calm", "label": 1}, {"captions": ["wind loudly blowing while people speak in the background followed by a horn blowing", "someone whistles a tune"], "sample_ids": ["xjhAnI2q6hM", "sIXTftIuUgw"], "start_seconds": ["6", "90"], "properties": ["wind, blow, loudly", "someone, tune, whistle"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", null], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a person whistling a song"], "question": "which is a musical instrument", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "paper is crumpling consistently"], "sample_ids": ["vqZuVbG6-HI", "v5cSxLaHADY"], "start_seconds": ["130", "0"], "properties": ["background, male, female", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a door slams shut roughly", "a woman and man are speaking"], "sample_ids": ["zkKdxzNC97Y", "vbpKkWvfOu4"], "start_seconds": ["27", "560"], "properties": ["a door, slams, shut", "two people, speaking, woman, man"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a door is opened and closed", "a woman is speaking and a man is speaking"], "question": "which entity is a person", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["y2bVZ7rz-5M", "ziUT9IFTkjg"], "start_seconds": ["280", "10"], "properties": ["motor noise, horn, siren", "background, birds, rustling"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", null], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "water is sprayed across a hard surface"], "sample_ids": ["sapQIQUhFc", "sQwlkXjQabo"], "start_seconds": ["280", "10"], "properties": ["liquid, flow, distance", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "spraying followed by silence"], "question": "which entity is a spray of water?", "label": 1}, {"captions": ["someone snores nearby", "paper folding and crinkling"], "sample_ids": ["spJCm8tD9Zo", "zPpG3RD8lSs"], "start_seconds": ["90", "20"], "properties": ["someone snores, nearby, someone", "paper, fold, crinkle"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a person is snoring loudly", "the wind blows and a mouse clicks "], "question": "which entity is not a person", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "a woman speaks as she rubs two objects together"], "sample_ids": ["xyx6eNVEYRY", "vzxHnu-SFEw"], "start_seconds": ["380", "80"], "properties": ["loud, engine, muffles", "two objects, woman, speak"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which object is rubbing together?", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "a man speaks as a car is passing by"], "sample_ids": ["vddP56-ogds", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["water, flow, laugh", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["wind blows as people chatter quietly", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["xBxDz0CFVn0", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["wind, chatter, people", "harsh, wind, blows"], "captions_pred_video": ["footage is blurry and out of focus", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more calm", "label": 0}, {"captions": ["people speak in a closed space", "a woman speaks happily and an animal chirps"], "sample_ids": ["sTpirNYo8vQ", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["people, space, speak", "a woman, chirps, animal"], "captions_pred_video": ["of a man taking a selfie on a bus", null], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a woman is speaking and a dog is barking "], "question": "which entity is more active", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sZPuqDgX2V0", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["engine, accelerate, intercom", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["wind blowing followed by a zoom", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vr8ZXjEBhMQ", "vbZ-0lGPneg"], "start_seconds": ["150", "30"], "properties": ["wind, blow, zoom", "a woman, a television program, a bird"], "captions_pred_video": ["is taken from a motorcycle's point of view", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a video of a natural phenomenon?", "label": 0}, {"captions": ["a woman talks while something is fried and objects are tapped", "a car accelerates and wind blows"], "sample_ids": ["yajyRTUQk3U", "u0TrcHhkPQ"], "start_seconds": ["400", "20"], "properties": ["a woman, something, fried", "accelerates, wind, blows"], "captions_pred_video": ["- a woman cooking in the kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "a man speaks as a car is passing by"], "sample_ids": ["vW4x7S1VfQc", "sK4u5T8hW78"], "start_seconds": ["150", "30"], "properties": ["clacking, oil, woman", "a, car, pass"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["food sizzles in a frying pan", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["xyx6eNVEYRY", "wwyfGO2J4"], "start_seconds": ["380", "90"], "properties": ["loud, engine, muffles", "people, applaud, hoot"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", null], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "people are clapping and speaking with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a person snoring", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["t8tv5YRMJUg", "wDVMhEdTiVw"], "start_seconds": ["0", "30"], "properties": ["a person, snore, loud", "gun, shoot, water"], "captions_pred_video": ["of a man getting his face licked by another man", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a person sniffs and breathes heavily", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is louder", "label": 1}, {"captions": ["a woman speaks with water running", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["wTideSjRFS0", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["water, running, woman", "male, duck, laugh"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking and ducks are quacking"], "question": "which entity has a duck in it?", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "people speak as gunfire rings out"], "sample_ids": ["wTideSjRFS0", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["food, sizzle, woman", "gunfire, ring, speak"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a horse runs while two women talk", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["sdvI1mHAsc", "wDVMhEdTiVw"], "start_seconds": ["20", "30"], "properties": ["two women, horse, run", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used for hunting", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["uEU-Hg5MTN8", "vlS6YMeWAPo"], "start_seconds": ["27", "40"], "properties": ["a woman, laughs, animal", "sheep, baa, birds"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a goat bleats and birds chirp"], "question": "which entity has more animals", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "birds chirp and objects are moved around"], "sample_ids": ["u6jIvCtKarQ", "yPUYU6t3rwo"], "start_seconds": ["70", "370"], "properties": ["a, man, speaks", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of a person using a blender on a stove top", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["wind blowing and birds chirping with the distant cooing of a large bird", "a telephone rings and a bird vocalizes"], "sample_ids": ["wRBHTgrbiwg", "skd2PphS6oI"], "start_seconds": ["50", "190"], "properties": ["birds, chirp, cooing", "ring, bird, vocalize"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a telephone bell rings repeatedly "], "question": "which entity is a bird vocalizing?", "label": 1}, {"captions": ["a person is snoring while sleeping", "a man speaks while turning a water faucet on"], "sample_ids": ["vJrjSeP17yE", "vf9xf3vMsGM"], "start_seconds": ["40", "540"], "properties": ["a person is sleeping, snoring, person", "A man speaks while turning a water faucet on."], "captions_pred_video": ["a black background with a small plane flying in the sky", "of the person washing their hands under the faucet"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking while water is running in the background"], "question": "which entity is a man?", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "a toilet flushes and a female speaks"], "sample_ids": ["yYEVLuqEytU", "yaln9y8I7ms"], "start_seconds": ["40", "230"], "properties": ["grunt, slurp, background", "female, flushes, toilet"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "footage is blurry and out of focus"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a toilet flushes and a man speaks"], "question": "which entity is a video of a toilet flushing?", "label": 1}, {"captions": ["some men converse over an engine running", "some tunes played by whistling"], "sample_ids": ["sCiy7QS1U", "u6BnG6YZqJ4"], "start_seconds": ["300", "0"], "properties": ["men, converse, engine", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "paper is crumpling consistently"], "sample_ids": ["sYITalLZjj4", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["water, rushes, background, birds", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["two ducks are swimming in the water near each other", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["wind blows and birds chirp", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "a woman speaks as she rubs two objects together"], "sample_ids": ["un9VQlzgZM", "vzxHnu-SFEw"], "start_seconds": ["5", "80"], "properties": ["females, talk, laugh", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a woman speaking?", "label": 1}, {"captions": ["a large crowd cheers and applauds", "a young woman speaks over spraying and another person yells"], "sample_ids": ["rqfQRErjfk8", "uYT5gxnyMWM"], "start_seconds": ["170", "50"], "properties": ["crowd, cheers, applauds", "person, spray, yell"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "a train horn blares as a train passes, then fades"], "sample_ids": ["xKB8O8LTs6s", "zVacuqSb4LI"], "start_seconds": ["70", "30"], "properties": ["music, gunfire, explosion", "blares, fades, train"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is more calm", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "water pouring and bubbling"], "sample_ids": ["vdoxuJn9lTc", "uyRfq-jKPpo"], "start_seconds": ["40", "50"], "properties": ["burp, loud, girl", "water, bubbles, pouring"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a child speaks followed by a burp", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a train engine runs and a horn blows", "a toilet flushes and a female speaks"], "sample_ids": ["zPX9o1uDiI", "yaln9y8I7ms"], "start_seconds": ["40", "230"], "properties": ["engine, horn, run", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "a toilet flushes and a man speaks"], "question": "which entity is a human", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "a car accelerates and wind blows"], "sample_ids": ["zsLxS-uLJTw", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["horn, blast, train", "accelerates, wind, blows"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", null], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["male speech with light ticking", "small dogs growl, bark and yip."], "sample_ids": ["xO-Q2BlIIPU", "sShpyu2l4YQ"], "start_seconds": ["30", "0"], "properties": ["male, speech, ticking", "growl, bark, yip"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "the puppies are playing with a toy"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a dog is barking and growling"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["t97k0cejSQE", "vfYTJq7nU"], "start_seconds": ["250", "130"], "properties": ["bird, chirp, insect", "rustling, ducks, quack"], "captions_pred_video": ["a bee on a purple thistle flower", null], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a duck quacks and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "several insects fly while two men talk"], "sample_ids": ["vzxHnu-SFEw", "s-T9OVOiMLo"], "start_seconds": ["80", "330"], "properties": ["two objects, woman, speak", "several, fly, men"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a woman speaking as she rubs two objects together?", "label": 0}, {"captions": ["someone is snoring while sleeping", "paper folding and crinkling"], "sample_ids": ["ujMt0-D-x2k", "zPpG3RD8lSs"], "start_seconds": ["0", "20"], "properties": ["snore, sleep, someone", "paper, fold, crinkle"], "captions_pred_video": ["of the dog playing with a toy on the floor", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a person is snoring loudly", "the wind blows and a mouse clicks "], "question": "which entity is not a person", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "a woman speaks as she rubs two objects together"], "sample_ids": ["vuUVPzd2FXw", "vzxHnu-SFEw"], "start_seconds": ["160", "80"], "properties": ["a, steam, release", "two objects, woman, speak"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a woman is speaking and breathing with mechanisms in the background "], "question": "which object is being rubbed together", "label": 0}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "several insects fly while two men talk"], "sample_ids": ["wsHBIgzs9Fs", "s-T9OVOiMLo"], "start_seconds": ["50", "330"], "properties": ["horn, continuous, buzzing", "several, fly, men"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more likely to be in a zoo", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "a flush is followed by gurgling water, then another flush"], "sample_ids": ["v7jJS8aAyA", "tqR406bGiE"], "start_seconds": ["10", "40"], "properties": ["wind, blows, loudly", "flush, water, gurgle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a toilet is flushed"], "question": "which entity is silent", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "several insects fly while two men talk"], "sample_ids": ["xvDdE3zNf8Y", "s-T9OVOiMLo"], "start_seconds": ["120", "330"], "properties": ["a, female, speaks", "several, fly, men"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more insects", "label": 1}, {"captions": ["speaking following by laughing and clapping", "someone snores nearby"], "sample_ids": ["u2f5NpsoHBg", "spJCm8tD9Zo"], "start_seconds": ["30", "90"], "properties": ["person, laugh, clap", "someone snores, nearby, someone"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a person is snoring loudly"], "question": "which person is speaking", "label": 0}, {"captions": ["a person burps loudly for a long time nearby", "water is sprayed across a hard surface"], "sample_ids": ["vf44CgrjT0A", "sQwlkXjQabo"], "start_seconds": ["20", "10"], "properties": ["loud, long, person", "water, spray, surface"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a loud burp", "spraying followed by silence"], "question": "which entity is a spray?", "label": 1}, {"captions": ["a consistent ticking pattern", "a large crowd cheers and applauds"], "sample_ids": ["sCeWURVHfOM", "rqfQRErjfk8"], "start_seconds": ["30", "170"], "properties": ["ticking, pattern, clock", "crowd, cheers, applauds"], "captions_pred_video": ["- a close-up view of the clock's inner workings", "a man hugging another man in front of an orchestra"], "captions_pred_audio": ["ticking of a clock", "a crowd of people clapping and cheering"], "question": "which entity is more likely to be a clock", "label": 0}, {"captions": ["food is frying while a woman speaks", "dishes cling together then a man begins to speak"], "sample_ids": ["yhQ2Lg-7qDY", "sQGXqGcwOTc"], "start_seconds": ["130", "3"], "properties": ["food, woman, speak", "cling, speak, dishes"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a faucet is running and a man is speaking", "mechanisms are operating and water is splashing "], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a person sniffles and then sneezes in the distance", "people cheer as a vehicle engine revs"], "sample_ids": ["uRlbY6aoBU", "xjhAnI2q6hM"], "start_seconds": ["0", "6"], "properties": ["a, distance, sneeze", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is sneezing ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["people speak and tapping occurs", "water flows as a woman laughs and a man speaks"], "sample_ids": ["tFCUUGdREgA", "vddP56-ogds"], "start_seconds": ["70", "30"], "properties": ["people, tap, speak", "water, flow, laugh"], "captions_pred_video": ["a person riding a white horse in an indoor arena", null], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "water is running and gurgling and a man is speaking"], "question": "which entity is a video of a person speaking and tapping?", "label": 0}, {"captions": ["a woman speaks as she rubs two objects together", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vzxHnu-SFEw", "ukg5L09Wpvo"], "start_seconds": ["80", "150"], "properties": ["two objects, woman, speak", "clickety-clack, train, whistle"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a male speaks and another male speaks", "a man speaks as a car is passing by"], "sample_ids": ["viuTg1M-dqg", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["two males, speaking, male", "a, car, pass"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["a man speaks as crickets sing", "three men talk while wind blows and some liquid flows"], "sample_ids": ["ryFDPxgDOGc", "vJ7JPEFhyLA"], "start_seconds": ["570", "16"], "properties": ["a, crickets, sing", "three men, wind, flow"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["an aircraft engine runs", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["yLCORCnd35Q", "zFjIWfSD-4"], "start_seconds": ["0", "410"], "properties": ["engine, aircraft, runs", "People, motor, brakes"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", null], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a person", "label": 1}, {"captions": ["a man is filing a hard object", "several insects fly while two men talk"], "sample_ids": ["vveS8HT7Uog", "s-T9OVOiMLo"], "start_seconds": ["100", "330"], "properties": ["a man, hard, object", "several, fly, men"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a man filing a hard object?", "label": 0}, {"captions": ["a woman speaks in a fast tone with a male", "someone whistles a tune"], "sample_ids": ["sTpirNYo8vQ", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["a, tone, fast", "someone, tune, whistle"], "captions_pred_video": ["of a man taking a selfie on a bus", null], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a airplane flies overhead as a woman speaks", "a clock ticktocks"], "sample_ids": ["zj2R0XoFr5k", "v-g-j2uTByM"], "start_seconds": ["50", "30"], "properties": ["airplane, fly, woman", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "roadway noise occurs and a truck accelerates"], "sample_ids": ["zkKdxzNC97Y", "tgbONvsP47Y"], "start_seconds": ["27", "0"], "properties": ["loud, bang, noise", "noise, truck, accelerate"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a door is opened and closed", "a car is driving on the road "], "question": "which noise is made by a truck", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["smDKStoHBJo", "vbZ-0lGPneg"], "start_seconds": ["0", "30"], "properties": ["a, talk, baby, cry", "a woman, a television program, a bird"], "captions_pred_video": ["a man holding a crying baby in his arms", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a woman is speaking and a dog is whimpering"], "question": "which entity has a baby?", "label": 0}, {"captions": ["an airplane engine roars increasingly louder", "a duck quacks continuously"], "sample_ids": ["vBslzh7saPw", "vh30P49Po6s"], "start_seconds": ["90", "30"], "properties": ["engine, roar, louder", "quacks, continuously, duck"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "a car speeding up in the distance"], "sample_ids": ["y1saVTXsKwc", "u0TrcHhkPQ"], "start_seconds": ["80", "20"], "properties": ["a, dog, talk", "distance, car, speed"], "captions_pred_video": ["a dog playing with a pink ball", null], "captions_pred_audio": ["a dog barks and a man speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "the revving of an engine throttle followed by a man speaking"], "sample_ids": ["vBHyYJ8pL0", "tezvROoo4bs"], "start_seconds": ["2", "40"], "properties": ["noise, door, opening", "audio, throttle, speaking"], "captions_pred_video": [null, "footage of a busy city street with cars parked on both sides of the road"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a car accelerates and revs while a man speaks "], "question": "which entity is accompanied by a door opening and closing", "label": 0}, {"captions": ["a male is speaking and a duck quacks as others laugh", "a man speaks as a car is passing by"], "sample_ids": ["tiDFTC-5vU", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["male, duck, laugh", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a duck in it?", "label": 0}, {"captions": ["multiple birds vocalize and wind blows", "a man speaks as a motor runs in the background"], "sample_ids": ["uoGVs9yUqY4", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["multiple, vocalize, wind", "background, motor, run"], "captions_pred_video": ["for how to make a wooden shed door youtube", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a man speaking to a motor?", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "a woman speaks as she rubs two objects together"], "sample_ids": ["sOa7g-44Dag", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["audio, scratching, man", "two objects, woman, speak"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a child speaks in closed space"], "sample_ids": ["ugHJF0hfYkg", "yW6FWLSLkx4"], "start_seconds": ["10", "40"], "properties": ["engine, running, continuously", "child, space, speak"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is not running continuously", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sOa7g-44Dag", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["background, man, spray", "two men, woman, birds"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", null], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a duck quacks continuously"], "sample_ids": ["wz7N8YRy74I", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["rooster, crow, background, people", "quacks, continuously, duck"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a duck is quacking loudly"], "question": "which animal is speaking", "label": 1}, {"captions": ["a train horn sounds as the train approaches", "a train horn blows as it passes by"], "sample_ids": ["slZLHwNbbt4", "zVacuqSb4LI"], "start_seconds": ["300", "30"], "properties": ["train, horn, sound", "horn, blows, train"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which train is blowing its horn?", "label": 1}, {"captions": ["a motorcycle engine is idling", "people speak as gunfire rings out"], "sample_ids": ["vZAqdHZ81yA", "wqTCwqVRDlk"], "start_seconds": ["180", "80"], "properties": ["engine, motorcycle, idling", "gunfire, ring, speak"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["an engine is idling loudly", "a man is speaking and a gun is fired"], "question": "which entity is not a motorcycle?", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "a clock ticktocks"], "sample_ids": ["sofxkNWaP0s", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["wind, engine, louder", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["birds twitter and chirp and clatter", "pigeons vocalize and birds chirp"], "sample_ids": ["yeFvk9x0wWI", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["chirp, twitter, clatter", "vocalize, bird, chirp"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "of the pigeon in the cage"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["leaves rustle while man speaks", "a man speaks as a motor runs in the background"], "sample_ids": ["zOZleIRqZm4", "xZepNM9qcRA"], "start_seconds": ["80", "30"], "properties": ["leaves, rustle, speak", "background, motor, run"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["wqUmIEzuNz4", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["frog, bird, vocalize", "two men, woman, birds"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", null], "captions_pred_audio": ["a cat meows and rustles", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a human activity", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "a drill runs and two people laugh"], "sample_ids": ["sa6TLVbooCc", "tEE3MpBt1sg"], "start_seconds": ["240", "50"], "properties": ["people, laugh, child", "two people, laugh, drill"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "people are laughing breathing and speaking with background noise "], "question": "which entity shows a drill running?", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sAam2NqGhLY", "yajyRTUQk3U"], "start_seconds": ["20", "400"], "properties": ["snoring, breathing, child", "a woman, something, fried"], "captions_pred_video": ["of a little girl sleeping on a couch", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a person is snoring", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a person speaks over rustling leaves", "a horn honks and then loudly blares"], "sample_ids": ["zOZleIRqZm4", "wnpJndXuxLc"], "start_seconds": ["80", "50"], "properties": ["rustling, leaves, person", "horn, honk, loud"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["a stream of water runs briefly", "paper folding and crinkling"], "sample_ids": ["x-PeY8Yb8M4", "zPpG3RD8lSs"], "start_seconds": ["300", "20"], "properties": ["stream, water, run", "paper, fold, crinkle"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a car is driving on a wet road ", "the wind blows and a mouse clicks "], "question": "which entity is not a stream of water?", "label": 1}, {"captions": ["an airplane engine roars increasingly louder", "an airplane engine spools and people speak"], "sample_ids": ["vBslzh7saPw", "wTjoRj1se3U"], "start_seconds": ["90", "390"], "properties": ["engine, roar, louder", "airplane, engine, spool"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a jet engine is running and people are talking"], "question": "which entity is a video of an airplane engine?", "label": 0}, {"captions": ["a child speaks in closed space", "a telephone rings followed by a woman talking"], "sample_ids": ["yW6FWLSLkx4", "tGcFnX0GHI"], "start_seconds": ["40", "0"], "properties": ["child, space, speak", "ring, talk, woman"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", null], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["humming and rattling of an engine idling as it revs", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["xMXvkIcaG0Y", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["sound, humming, rattling", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of a car's hood being opened up to reveal the engine underneath the hood", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["an engine is revving and accelerating ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["waves crash against a shoreline and people speak", "water flows and trickles"], "sample_ids": ["yFB25fqfU8I", "tB7hWb9gTuQ"], "start_seconds": ["300", "30"], "properties": ["wave, crash, shoreline", "water, flow, trickle"], "captions_pred_video": ["footage of a person surfing in the ocean", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["someone is snoring while sleeping", "some tunes played by whistling"], "sample_ids": ["ujMt0-D-x2k", "u6BnG6YZqJ4"], "start_seconds": ["0", "0"], "properties": ["snore, sleep, someone", "tune, play, whistling"], "captions_pred_video": ["of the dog playing with a toy on the floor", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a person is snoring loudly", "a person whistling a song"], "question": "which entity is not playing a tune?", "label": 0}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a clock ticktocks"], "sample_ids": ["su6FAOcOA8c", "v-g-j2uTByM"], "start_seconds": ["4", "30"], "properties": ["engine, run, woman", "ticktocks, clock, ticktocks"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vddP56-ogds", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["water, flow, laugh", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows water flowing?", "label": 0}, {"captions": ["birds twitter and chirp and clatter", "birds chirp as a bell rings"], "sample_ids": ["yeFvk9x0wWI", "ziUT9IFTkjg"], "start_seconds": ["30", "10"], "properties": ["chirp, twitter, clatter", "chirp, bell, ring"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", null], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "a infant makes noise and is excited"], "sample_ids": ["wsHBIgzs9Fs", "wIJK3-5y0kA"], "start_seconds": ["50", "30"], "properties": ["horn, continuous, buzzing", "noise, excited, infant"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "a clock ticktocks"], "sample_ids": ["sapQIQUhFc", "v-g-j2uTByM"], "start_seconds": ["280", "30"], "properties": ["liquid, flow, distance", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a baby cries and wails as an adult female speaks", "a person is burping while a girl speaks"], "sample_ids": ["zliInBdC98Y", "vdoxuJn9lTc"], "start_seconds": ["30", "40"], "properties": ["a, baby, cries, wails", "person, burp, girl"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", "a group of young girls playing a video game together in a living room"], "captions_pred_audio": ["a baby cries and a woman speaks", "a child speaks followed by a burp"], "question": "which entity is a person?", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sTpirNYo8vQ", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["a, tone, fast", "two men, woman, birds"], "captions_pred_video": ["of a man taking a selfie on a bus", null], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["bees buzz and wind blows", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["tMJne1a4AFI", "yDoT73BWsdA"], "start_seconds": ["0", "10"], "properties": ["bees buzz, wind blows, bees", "engine, revs, vehicle"], "captions_pred_video": ["a swarm of bees on the ground", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a swarm of bees buzzing around", "a race car accelerates and revs its engine "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["people speak in a closed space", "an airplane engine spools and people speak"], "sample_ids": ["sTpirNYo8vQ", "wTjoRj1se3U"], "start_seconds": ["30", "390"], "properties": ["people, space, speak", "airplane, engine, spool"], "captions_pred_video": ["of a man taking a selfie on a bus", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a jet engine is running and people are talking"], "question": "which entity is a video", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "water pouring and bubbling"], "sample_ids": ["yks4cLgIDMc", "uyRfq-jKPpo"], "start_seconds": ["170", "50"], "properties": ["background, speaking, child", "water, bubbles, pouring"], "captions_pred_video": ["footage of two kids wrestling on the floor", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and a child is crying", "water is running from a faucet"], "question": "which entity is more likely to be in a bathroom", "label": 1}, {"captions": ["tapping occurs then a baby cries", "a infant makes noise and is excited"], "sample_ids": ["wIJK3-5y0kA", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["a, cry, baby", "noise, excited, infant"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a baby cries and a woman speaks", "a baby cries and a woman speaks"], "question": "which entity is a baby?", "label": 0}, {"captions": ["a toilet flushes and water drains", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["sfAvvZwdLCY", "xfaoyyzw2WU"], "start_seconds": ["20", "180"], "properties": ["water drains, flushes, water", "loud, jet engine, roar"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a toilet is flushed", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "a infant makes noise and is excited"], "sample_ids": ["xjhAnI2q6hM", "wIJK3-5y0kA"], "start_seconds": ["6", "30"], "properties": ["engine revs, vehicle, people", "noise, excited, infant"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a baby cries and a woman speaks"], "question": "which entity is more excited", "label": 1}, {"captions": ["electronic beeps occur in a short series", "water splashes as an animal walks through"], "sample_ids": ["y682ml90jGw", "w1ir-sZ3Im8"], "start_seconds": ["11", "90"], "properties": ["beeps, series, electronic", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a beeping sound is being made ", "water splashes and gurgles as people speak"], "question": "which entity is more natural", "label": 1}, {"captions": ["birds chirp and pigeons vocalize while walking around", "water splashes as an animal walks through"], "sample_ids": ["wIvYjuR3nrg", "w1ir-sZ3Im8"], "start_seconds": ["9", "90"], "properties": ["birds, pigeons, vocalize", "animal, water, splashes"], "captions_pred_video": ["footage of a pigeon sitting on a roof with trees in the background", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["birds are chirping and cooing", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["uYT5gxnyMWM", "ziUT9IFTkjg"], "start_seconds": ["50", "10"], "properties": ["a, scream, girl", "background, birds, rustling"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a child speaks", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["yW6FWLSLkx4", "zFjIWfSD-4"], "start_seconds": ["40", "410"], "properties": ["a, child, speaks", "People, motor, brakes"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", null], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a person", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vW4x7S1VfQc", "zj2R0XoFr5k"], "start_seconds": ["150", "50"], "properties": ["clacking, oil, woman", "airplane, boy, fly"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["food sizzles in a frying pan", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["material crumbles into a microphone", "birds chirp and objects are moved around"], "sample_ids": ["vofpvUo6NAw", "yPUYU6t3rwo"], "start_seconds": ["220", "370"], "properties": ["material, crumbles, microphone", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["paper is being crumpled and crinkled", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "a person is snoring while sleeping"], "sample_ids": ["tPJvjq9QePY", "vJrjSeP17yE"], "start_seconds": ["40", "40"], "properties": ["animal, bleat, moo", "a person is sleeping, snoring, person"], "captions_pred_video": ["a dog and a sheep in a barn", "a black background with a small plane flying in the sky"], "captions_pred_audio": ["a baby cries and a man speaks", "a person snoring loudly"], "question": "which entity is not a person?", "label": 0}, {"captions": ["birds chirp quietly and an adult man speaks", "a duck quacks continuously"], "sample_ids": ["zuua6-5goWw", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["birds, chirp, quiet, man, speaks", "quacks, continuously, duck"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 0}, {"captions": ["a frog croaks as other frogs croak in the background", "a toilet flushes and a female speaks"], "sample_ids": ["yswmmRZFItk", "yaln9y8I7ms"], "start_seconds": ["0", "230"], "properties": ["background, frog, croak", "female, flushes, toilet"], "captions_pred_video": ["a close up of a frog in the water", "footage is blurry and out of focus"], "captions_pred_audio": ["a frog is croaking", "a toilet flushes and a man speaks"], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["wind noise makes sound into a microphone", "paper is crumpling consistently"], "sample_ids": ["w8uLijTqtlU", "v5cSxLaHADY"], "start_seconds": ["70", "0"], "properties": ["wind, microphone, noise", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage is blurry and shaky", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["the wind is blowing strongly", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "water rushes by"], "sample_ids": ["zdYdyF9-m8U", "x-PeY8Yb8M4"], "start_seconds": ["7", "300"], "properties": ["wind, crash, shoreline", "water, rushes, by"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["waves crash and wind blows ", "a car is driving on a wet road "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "a vehicle accelerates squealing tires"], "sample_ids": ["sOa7g-44Dag", "sd7xVssqlw"], "start_seconds": ["30", "50"], "properties": ["audio, scratching, man", "accelerates, tires, squealing"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", null], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "water flows as men speak and yell"], "sample_ids": ["wvKpEYswXO0", "vJ7JPEFhyLA"], "start_seconds": ["150", "16"], "properties": ["water, tap, run", "water, flow, men"], "captions_pred_video": ["of the person preparing food in the kitchen", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has water flowing?", "label": 1}, {"captions": ["a horse runs while two women talk", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["sdvI1mHAsc", "w34HjHr6gAY"], "start_seconds": ["20", "30"], "properties": ["two women, horse, run", "beeps, hit, woman"], "captions_pred_video": [null, "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a clock ticktocks in wind", "a person snores loudly multiple times at a close distance"], "sample_ids": ["yVumC9TGknc", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["ticktocks, clock, wind", "loud, multiple, distance"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", null], "captions_pred_audio": ["a series of beeps and chirps", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "someone snores nearby"], "sample_ids": ["y8WEcpOlT3I", "spJCm8tD9Zo"], "start_seconds": ["40", "90"], "properties": ["wind, speak, buffeting", "someone snores, nearby, someone"], "captions_pred_video": ["on how to use a sewing machine youtube", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a person is snoring loudly"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["women speak and laugh as wind blows", "a motor runs in the distance as a soft wind periodically gusts"], "sample_ids": ["un9VQlzgZM", "xyL9F5VrjkE"], "start_seconds": ["5", "20"], "properties": ["wind, speak, laugh", "wind, motor, distance"], "captions_pred_video": [null, "of a caterpillar truck loading logs into a trailer"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "the wind is blowing and a car is passing by "], "question": "which entity is about a motor running in the distance as a soft wind periodically gusts?", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "wind blows in gusts as a woman speaks in the distance"], "sample_ids": ["s4Uz1Ffgo04", "uC9dtII1KDI"], "start_seconds": ["100", "150"], "properties": ["water, rushes, vehicle", "wind, gusts, distance"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of a person riding a horse in a riding arena"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a woman is speaking with wind noise and breathing in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "a man speaks as a car is passing by"], "sample_ids": ["zFjIWfSD-4", "sK4u5T8hW78"], "start_seconds": ["410", "30"], "properties": ["People, motor, brakes", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "an infant crying as a woman laughs"], "sample_ids": ["uWAAAL4CIoc", "xhmRY9yhC7c"], "start_seconds": ["0", "20"], "properties": ["a woman, chirps, animal", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a baby cries and a woman speaks"], "question": "which entity is more likely to be a solitary event", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "a car speeding up in the distance"], "sample_ids": ["tGcFnX0GHI", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["ring, talk, woman", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["humming and rattling of an engine idling as it revs", "a woman and man speak while food is frying"], "sample_ids": ["xMXvkIcaG0Y", "zk-xJGQU8-4"], "start_seconds": ["30", "130"], "properties": ["sound, humming, rattling", "food, man, woman"], "captions_pred_video": ["footage of a car's hood being opened up to reveal the engine underneath the hood", "a man and a woman cooking in a wok on the stove"], "captions_pred_audio": ["an engine is revving and accelerating ", "a woman is speaking while dishes are clanging and music is playing in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "a woman speaks happily and an animal chirps"], "sample_ids": ["smDKStoHBJo", "uWAAAL4CIoc"], "start_seconds": ["0", "0"], "properties": ["a, infant, speaking", "a woman, chirps, animal"], "captions_pred_video": ["a man holding a crying baby in his arms", null], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a woman is speaking and a dog is barking "], "question": "which entity is a woman speaking to an animal?", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "wind blows as people chatter quietly"], "sample_ids": ["sOa7g-44Dag", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["background, man, spray", "wind, chatter, people"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "a clock alarm sounds and gears turn"], "sample_ids": ["xjvTpk2Zpr8", "w2M4i1mklOA"], "start_seconds": ["70", "30"], "properties": ["engine, run, wind", "alarm, gears, turn"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "footage of an antique clock"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a clock is ticking and a bell is ringing "], "question": "which entity is not a clock?", "label": 0}, {"captions": ["a horse runs while two women talk", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["sdvI1mHAsc", "y8WEcpOlT3I"], "start_seconds": ["20", "40"], "properties": ["two women, horse, run", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a man is speaking with wind noise in the background "], "question": "which entity is more active", "label": 0}, {"captions": ["a person is whistling", "an infant crying frantically"], "sample_ids": ["sIXTftIuUgw", "zwOBqeFTgiU"], "start_seconds": ["90", "30"], "properties": ["person, whistling, person", "cry, infant, frantically"], "captions_pred_video": [null, "of the baby crying in the car seat"], "captions_pred_audio": ["a person whistling a song", "a baby cries loudly"], "question": "which entity is a person", "label": 0}, {"captions": ["a man speaks followed by another man speaking outside", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["viuTg1M-dqg", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["two men, speak, follow", "motor noise, horn, siren"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is followed by a horn and a siren", "label": 1}, {"captions": ["a goat screams and people speak in the background", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["xC8kbrKJmco", "y2bVZ7rz-5M"], "start_seconds": ["0", "280"], "properties": ["background, goat, scream", "motor noise, horn, siren"], "captions_pred_video": [null, "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a goat is bleating ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is a warning", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "winds blows roughly as a vehicle races past"], "sample_ids": ["su6FAOcOA8c", "xjvTpk2Zpr8"], "start_seconds": ["4", "70"], "properties": ["engine, run, woman", "wind, blows, vehicle"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "multiple ducks quack continuously"], "sample_ids": ["w5W5Kqtc8E", "wfHeoPDLMaM"], "start_seconds": ["100", "30"], "properties": ["wind, engine, scream", "multiple, quack, continuously"], "captions_pred_video": [null, "ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "ducks are quacking"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a church bell rings several times", "a man talks while a clock does ticktock"], "sample_ids": ["sUVVjE3Ucp8", "spYNpeN7rPY"], "start_seconds": ["0", "1"], "properties": ["ring, bell, several", "a clock, ticktock, man"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", "in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis"], "captions_pred_audio": ["a church bell is ringing ", "a man is speaking and breathing with background noise "], "question": "which entity is a clock?", "label": 1}, {"captions": ["rain falls on a surface as men speak and thunder roars", "water is sprayed across a hard surface"], "sample_ids": ["w0xsN8X18Y", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["rain, thunder, surface", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "water splashes as an animal walks through"], "sample_ids": ["zF8yoL0rkbI", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["engine, run, someone", "animal, water, splashes"], "captions_pred_video": ["footage of the traffic on the street at night", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "someone snores nearby"], "sample_ids": ["x6ijhqRY38s", "spJCm8tD9Zo"], "start_seconds": ["250", "90"], "properties": ["bowl, silverware, man", "someone snores, nearby, someone"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a male speaks over some small clicks", "winds blows roughly as a vehicle races past"], "sample_ids": ["uXxVebHsGZ8", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["male, clicks, speak", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["weDbePuc-Xc", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["music, slaps, human", "animal, grunts, snorts"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking and an animal grunts and snorts?", "label": 1}, {"captions": ["a telephone rings and a bird vocalizes", "a stream of water flows as people talk and wind blows"], "sample_ids": ["skd2PphS6oI", "xBxDz0CFVn0"], "start_seconds": ["190", "30"], "properties": ["ring, bird, vocalize", "stream, water, flow"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", "footage is blurry and out of focus"], "captions_pred_audio": ["a telephone bell rings repeatedly ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["wy1eKjR7KC0", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["people, talk, distance", "a woman, a television program, a bird"], "captions_pred_video": ["two police officers riding motorcycles down the street", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking and a siren is going off", "a woman is speaking and a dog is whimpering"], "question": "which entity is a solitary event", "label": 1}, {"captions": ["water splashes and a door squeaks", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["sdXV-ylviw", "xfaoyyzw2WU"], "start_seconds": ["190", "180"], "properties": ["sound, splash, door", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a dog barks and taps with background noise ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["an adult woman and an adult man speak", "an engine runs loudly"], "sample_ids": ["zTLVJCo4WEE", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["two people, adult, speak", "loud, engine, run"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a lawn mower is running and men are speaking "], "question": "which is louder", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["uqFtmnhuqA8", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["a, b, c", "motor noise, horn, siren"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is followed by a horn honking and a siren wailing?", "label": 1}, {"captions": ["a vehicle accelerates squealing tires", "an airplane engine roars increasingly louder"], "sample_ids": ["sd7xVssqlw", "vBslzh7saPw"], "start_seconds": ["50", "90"], "properties": ["accelerates, tires, squealing", "engine, roar, louder"], "captions_pred_video": [null, "a pickup truck carrying a large object down the road"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a jet engine roars and accelerates "], "question": "which entity is louder", "label": 1}, {"captions": ["a small engine spits as it runs", "a train horn blows as it passes by"], "sample_ids": ["sZvwOuuPGP0", "zVacuqSb4LI"], "start_seconds": ["50", "30"], "properties": ["spits, engine, runs", "horn, blows, train"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a medium engine is running ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which train is blowing a horn?", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["y2bVZ7rz-5M", "yajyRTUQk3U"], "start_seconds": ["280", "400"], "properties": ["motor noise, horn, siren", "a woman, something, fried"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["u7C-AEBQM", "wSVhSdj0F0"], "start_seconds": ["30", "10"], "properties": ["ticks, rhythmic, quiet", "horn honks, keys jingle, electronic beep"], "captions_pred_video": [null, null], "captions_pred_audio": ["a ticktock of a clock", "a car horn honks and keys jangle with background noise "], "question": "which entity is louder", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["y8dSeubCNI", "uZesmtKZGSw"], "start_seconds": ["4", "250"], "properties": ["engine revving, people speaking, motorcycle", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["an engine revving and people talking in the background", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["birds vocalize and a man speaks", "a person uses a saw to cut some wood"], "sample_ids": ["v0wPrLBI3hg", "sHbXC6na9hg"], "start_seconds": ["30", "0"], "properties": ["vocalize, bird, speak", "a person, saw, wood"], "captions_pred_video": ["footage of the pigeons feeding on the ground", "a man using a tractor to cut a log into firewood youtube"], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "an engine is idling and vibrating"], "question": "which entity is a person", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["x5cuQjOdM3E", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["cat, meows, young woman", "a, scream, girl"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a cat meows and a woman speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "wind blows as people chatter quietly"], "sample_ids": ["uiItxDsDMFI", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["wood, piece, saw", "wind, chatter, people"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "footage is blurry and out of focus"], "captions_pred_audio": ["a saw is being used with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a car speeding up in the distance"], "sample_ids": ["x5cuQjOdM3E", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["cat, meows, young woman", "distance, car, speed"], "captions_pred_video": ["a black background with an airplane flying in the sky", null], "captions_pred_audio": ["a cat meows and a woman speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a speedboat passes quickly on the water", "a drill drills through something then people begin laughing"], "sample_ids": ["tjmoSi330GM", "tEE3MpBt1sg"], "start_seconds": ["23", "50"], "properties": ["speed, water, boat", "drill, something, laugh"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "people are laughing breathing and speaking with background noise "], "question": "which is a drill", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "multiple people speak and children yell while water gurgles"], "sample_ids": ["siJFXfGWgDk", "vb1fPSDI4c"], "start_seconds": ["50", "30"], "properties": ["a, bird, vehicle", "multiple, people, yell"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", null], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a man is filing a hard object"], "sample_ids": ["v0x1odnXtP0", "vveS8HT7Uog"], "start_seconds": ["210", "100"], "properties": ["keyboard, type, computer", "a man, hard, object"], "captions_pred_video": ["how to make money on youtube in spanish", "footage is of a workbench with various tools on it including a hammer and a screwdriver"], "captions_pred_audio": ["a person is typing on a keyboard", "a man is filing and speaking with background noise and breathing "], "question": "which object is harder to file", "label": 0}, {"captions": ["an animal growls followed by birds chirping", "a small musical boom and then birds tweet and a few dogs pant"], "sample_ids": ["y2ZBGpgbhHM", "y2ZBGpgbhHM"], "start_seconds": ["30", "30"], "properties": ["animal, growl, bird", "birds, tweet, pant"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds chirping and a dog panting", "birds chirping and a dog panting"], "question": "which entity has more animals", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "water flows as a woman laughs and a man speaks"], "sample_ids": ["zkKdxzNC97Y", "vddP56-ogds"], "start_seconds": ["27", "30"], "properties": ["hard, surface, door", "water, flow, laugh"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "water is running and gurgling and a man is speaking"], "question": "which entity is a video", "label": 1}, {"captions": ["wind blowing and birds chirping with the distant cooing of a large bird", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["wRBHTgrbiwg", "tDlysoZiA1I"], "start_seconds": ["50", "0"], "properties": ["birds, chirp, cooing", "animal, grunts, chirps"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "birds are chirping and a rooster is crowing "], "question": "which entity is a recording of birds chirping?", "label": 0}, {"captions": ["the rumbling of a bus followed by a soft male voice", "winds blows roughly as a vehicle races past"], "sample_ids": ["vK93VuO0yNc", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["male voice, bus, rumble", "wind, blows, vehicle"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a car drives by with wind noise in the background ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "an infant crying as a woman laughs"], "sample_ids": ["wqZ135Ssz0", "xhmRY9yhC7c"], "start_seconds": ["60", "20"], "properties": ["man, woman, squawks", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a baby cries and a woman speaks"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["tapping occurs then a baby cries", "a motor noise is accompanied by a door opening and closing"], "sample_ids": ["wIJK3-5y0kA", "vBHyYJ8pL0"], "start_seconds": ["30", "2"], "properties": ["a, cry, baby", "noise, door, opening"], "captions_pred_video": ["of a baby playing with a cat in a dark room", null], "captions_pred_audio": ["a baby cries and a woman speaks", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is accompanied by a door opening and closing?", "label": 1}, {"captions": ["an adult woman and an adult man speak", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["zTLVJCo4WEE", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["two people, adult, speak", "People, motor, brakes"], "captions_pred_video": ["- a boy with a rifle aiming at a target", null], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has more people", "label": 1}, {"captions": ["an engine runs and a man speaks", "a toilet flushes and a female speaks"], "sample_ids": ["yT5WfYMRr-U", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["engine, run, man", "female, flushes, toilet"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a toilet flushes and a man speaks"], "question": "which entity is a man speaking?", "label": 0}, {"captions": ["multiple birds chirp and an animal grunts", "water pouring and bubbling"], "sample_ids": ["tDlysoZiA1I", "uyRfq-jKPpo"], "start_seconds": ["0", "50"], "properties": ["animal, grunt, multiple", "water, bubbles, pouring"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an airplane engine runs", "a stream of water runs briefly"], "sample_ids": ["yVPZ2MNWpms", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["engine, airplane, runs", "stream, water, run"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a car is driving by on the road ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["speaking following by laughing and clapping", "a vehicle engine accelerating then running on idle"], "sample_ids": ["u2f5NpsoHBg", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["person, laugh, clap", "engine, accelerate, idle"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "an engine is idling"], "question": "which entity is not a person?", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["uqFtmnhuqA8", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["a, b, c", "airplane, boy, fly"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "a toilet flushes and a female speaks"], "sample_ids": ["vXlk0lIQBFo", "yaln9y8I7ms"], "start_seconds": ["470", "230"], "properties": ["wind, talk, vocalize", "female, flushes, toilet"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "footage is blurry and out of focus"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a toilet flushes and a man speaks"], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["goats bleat and people speak", "loud, continuous burping"], "sample_ids": ["z5iUE5h0EPs", "y636gklDioE"], "start_seconds": ["30", "20"], "properties": ["goats bleat, people speak, language", "loud, continuous, burping"], "captions_pred_video": ["of the goat in the barn", "a dog sitting on a red chair in front of an old telephone"], "captions_pred_audio": ["a goat bleats and a man speaks", "a person burps loudly several times"], "question": "which is louder", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "an engine runs loudly"], "sample_ids": ["yaln9y8I7ms", "vqZuVbG6-HI"], "start_seconds": ["230", "130"], "properties": ["female, flushes, toilet", "loud, engine, run"], "captions_pred_video": ["footage is blurry and out of focus", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vSeGhaZt-aI", "uYT5gxnyMWM"], "start_seconds": ["50", "50"], "properties": ["water, bubbles, run", "a, scream, girl"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a grown man speaking and water bubbles and runs?", "label": 0}, {"captions": ["a child speaks in closed space", "a female speaks softly as paper crinkles"], "sample_ids": ["yW6FWLSLkx4", "xvDdE3zNf8Y"], "start_seconds": ["40", "120"], "properties": ["child, space, speak", "a, female, speaks"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "of a woman in a white shirt and glasses holding a purple tie"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a woman speaks and crumples paper"], "question": "which entity is speaking softly", "label": 1}, {"captions": ["music plays followed by gunshots and then an explosion", "a frog croaks as other frogs croak in the background"], "sample_ids": ["xKB8O8LTs6s", "yswmmRZFItk"], "start_seconds": ["70", "0"], "properties": ["music, gunshots, explosion", "background, frog, croak"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a close up of a frog in the water"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a frog is croaking"], "question": "which entity is more quiet", "label": 1}, {"captions": ["children speak as a female ask them questions", "water flows and trickles"], "sample_ids": ["wEBlkGWVWwE", "tB7hWb9gTuQ"], "start_seconds": ["260", "30"], "properties": ["female, speak, questions", "water, flow, trickle"], "captions_pred_video": ["shows a person writing on the whiteboard", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "water is splashing and gurgling"], "question": "which entity is not a flow of water?", "label": 0}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "a drill drills through something then people begin laughing"], "sample_ids": ["zkKdxzNC97Y", "tEE3MpBt1sg"], "start_seconds": ["27", "50"], "properties": ["loud, bang, noise", "drill, something, laugh"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a door is opened and closed", "people are laughing breathing and speaking with background noise "], "question": "which entity is a drill?", "label": 1}, {"captions": ["a child speaks", "a car speeding up in the distance"], "sample_ids": ["yW6FWLSLkx4", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["a, child, speaks", "distance, car, speed"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", null], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "vehicles pass by on a roadway"], "sample_ids": ["xKB8O8LTs6s", "tgbONvsP47Y"], "start_seconds": ["70", "0"], "properties": ["music, gunfire, explosion", "pass, vehicle, roadway"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage of a fire truck entering a garage"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a car is driving on the road "], "question": "which entity is more calm", "label": 1}, {"captions": ["women speak and laugh as wind blows", "dishes cling together then a man begins to speak"], "sample_ids": ["un9VQlzgZM", "sQGXqGcwOTc"], "start_seconds": ["5", "3"], "properties": ["wind, speak, laugh", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "an infant crying frantically"], "sample_ids": ["zj2R0XoFr5k", "zwOBqeFTgiU"], "start_seconds": ["50", "30"], "properties": ["airplane, boy, fly", "cry, infant, frantically"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "of the baby crying in the car seat"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a man speaks with another voice speaking in the background"], "sample_ids": ["xKB8O8LTs6s", "u21-Z5gJCB8"], "start_seconds": ["70", "30"], "properties": ["music, radio, gunshots", "background, voice, man"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "- a person cooking eggs in a pan on the stove"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a woman speaking on a radio?", "label": 0}, {"captions": ["a vehicle accelerates and squeals tires", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["yRx9txMcBl0", "zFjIWfSD-4"], "start_seconds": ["40", "410"], "properties": ["accelerates, tires, squeals", "People, motor, brakes"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", null], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["females talk and laugh over gusting wind", "a man speaks as a car is passing by"], "sample_ids": ["un9VQlzgZM", "sK4u5T8hW78"], "start_seconds": ["5", "30"], "properties": ["females, talk, laugh", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["small dogs yip and bark sharply", "paper is crumpling consistently"], "sample_ids": ["v-wcQf4BDY0", "v5cSxLaHADY"], "start_seconds": ["120", "0"], "properties": ["bark, yip, sharply", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a dog barks and growls", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "dishes cling together then a man begins to speak"], "sample_ids": ["zY3icUyMdh8", "sQGXqGcwOTc"], "start_seconds": ["20", "3"], "properties": ["dog, bark, engine", "cling, speak, dishes"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "mechanisms are operating and water is splashing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "dogs barking and whimpering"], "sample_ids": ["w5W5Kqtc8E", "tIY7qOV3rEM"], "start_seconds": ["100", "0"], "properties": ["wind, engine, scream", "barking, whimpering, dog"], "captions_pred_video": [null, "a dog is standing in the middle of a dirt road in the woods"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a dog is barking and a cat is meowing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a car accelerates and wind blows"], "sample_ids": ["yYEVLuqEytU", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["animal, pig, background", "accelerates, wind, blows"], "captions_pred_video": ["a baby goat is being petted by a person's hand", null], "captions_pred_audio": ["several sheep bleat and a man speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["continuous sneezing together with speech", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["x4dZyf9Gbj0", "yajyRTUQk3U"], "start_seconds": ["130", "400"], "properties": ["continuous, sneeze, speech", "a woman, something, fried"], "captions_pred_video": ["footage is blurry and out of focus", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a woman sneezes and speaks", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["sLUnaPT5gM8", "y2bVZ7rz-5M"], "start_seconds": ["0", "280"], "properties": ["loud, laughter, intermittent", "motor noise, horn, siren"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is a warning", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "a person snores loudly multiple times at a close distance"], "sample_ids": ["ukxt9I7eMMg", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["continuous, woman, speaking", "loud, multiple, distance"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["water splashes as an animal walks through", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["w1ir-sZ3Im8", "zj2R0XoFr5k"], "start_seconds": ["90", "50"], "properties": ["animal, water, splashes", "airplane, boy, fly"], "captions_pred_video": ["footage of a group of people riding horses through a river", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["water splashes and gurgles as people speak", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a man speaks while water drains", "birds chirp and objects are moved around"], "sample_ids": ["vSeGhaZt-aI", "yPUYU6t3rwo"], "start_seconds": ["50", "370"], "properties": ["water, drain, man", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a baby cries and a woman moans", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["smDKStoHBJo", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["a, cry, woman", "a woman, something, fried"], "captions_pred_video": ["a man holding a crying baby in his arms", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a woman is speaking while food is frying in the background"], "question": "which entity is about a woman talking?", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "someone is burping continuously"], "sample_ids": ["w5W5Kqtc8E", "y636gklDioE"], "start_seconds": ["100", "20"], "properties": ["wind, engine, scream", "burps, burps, burps"], "captions_pred_video": [null, "a dog sitting on a red chair in front of an old telephone"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a person burps loudly several times"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as bees buzz and birds chirp", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["t25U-v4k4ts", "sLUnaPT5gM8"], "start_seconds": ["40", "0"], "properties": ["bees buzz, birds chirp, man speaks", "loud, laughter, intermittent"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man speaks as a machine runs", "a man speaks as a machine runs"], "sample_ids": ["vD6lYD1l0BY", "vD6lYD1l0BY"], "start_seconds": ["330", "330"], "properties": ["a, machine, run", "a, machine, run"], "captions_pred_video": ["game controller being held in the hands of the person", "game controller being held in the hands of the person"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a man is speaking and dishes are being washed "], "question": "which machine is running in the first image?", "label": 0}, {"captions": ["children cheer as a man speaks then an audience screams", "someone snores nearby"], "sample_ids": ["vJvryTwuAV8", "spJCm8tD9Zo"], "start_seconds": ["16", "90"], "properties": ["audience, cheer, man", "someone snores, nearby, someone"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["an adult woman and an adult man speak", "people cheer as a vehicle engine revs"], "sample_ids": ["zTLVJCo4WEE", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["two people, adult, speak", "engine revs, vehicle, people"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a truck is revving its engine and a man is speaking "], "question": "which entity shows people cheering?", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["yRx9txMcBl0", "zFjIWfSD-4"], "start_seconds": ["40", "410"], "properties": ["accelerates, tires, squeals", "People, motor, brakes"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", null], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["people speak then an engine runs", "wind blows as people chatter quietly"], "sample_ids": ["uMTTDZ2mb4", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["engine, run, people", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["tapping occurs then a baby cries", "wind blowing followed by a zoom"], "sample_ids": ["wIJK3-5y0kA", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["a, cry, baby", "wind, blow, zoom"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a baby cries and a woman speaks", "wind blows and a chainsaw cuts through wood "], "question": "which entity is not a zoom?", "label": 0}, {"captions": ["females talk and laugh over gusting wind", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["un9VQlzgZM", "vbZ-0lGPneg"], "start_seconds": ["5", "30"], "properties": ["females, talk, laugh", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a woman is speaking and a dog is whimpering"], "question": "which entity is more likely to be a documentary", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wqADXCzngMw", "tdWhHV3X25Q"], "start_seconds": ["340", "60"], "properties": ["engine, idle, man", "applause, audience, yells"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["someone whistles a tune", "a child speaks in closed space"], "sample_ids": ["sIXTftIuUgw", "yW6FWLSLkx4"], "start_seconds": ["90", "40"], "properties": ["someone, tune, whistle", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a person whistling a song", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["xyL9F5VrjkE", "vfYTJq7nU"], "start_seconds": ["20", "130"], "properties": ["wind, motor, distance", "rustling, ducks, quack"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", null], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a duck quacks and a woman speaks"], "question": "which entity is about a duck?", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "a jet engine spools up and takes off"], "sample_ids": ["tEE3MpBt1sg", "vBslzh7saPw"], "start_seconds": ["50", "90"], "properties": ["drill, something, laugh", "engine, spools, takes"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a pickup truck carrying a large object down the road"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a jet engine roars and accelerates "], "question": "which entity is a machine?", "label": 0}, {"captions": ["a cat meows as a young woman speaks", "paper is crumpling consistently"], "sample_ids": ["x5cuQjOdM3E", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["cat, meows, young woman", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a cat meows and a woman speaks", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a clicking followed by some people laughing and a kid speaking", "a consistent ticking pattern"], "sample_ids": ["vz8868znkVQ", "sCeWURVHfOM"], "start_seconds": ["60", "30"], "properties": ["audio, click, kid speaking", "ticking, pattern, clock"], "captions_pred_video": ["a video of a plane flying over a cloudy sky", "- a close-up view of the clock's inner workings"], "captions_pred_audio": ["a baby is laughing and breathing with background noise ", "ticking of a clock"], "question": "which entity is a clock?", "label": 1}, {"captions": ["a person sniffs and sneezes", "a train horn blows as it passes by"], "sample_ids": ["uRlbY6aoBU", "zVacuqSb4LI"], "start_seconds": ["0", "30"], "properties": ["sneezes, person, sniffs", "horn, blows, train"], "captions_pred_video": [null, "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a man is sneezing ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "someone is typing on a computer keyboard"], "sample_ids": ["w-4gHptFNuU", "v0x1odnXtP0"], "start_seconds": ["21", "210"], "properties": ["engine revs, accelerates, bump", "keyboard, type, computer"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "how to make money on youtube in spanish"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a person is typing on a keyboard"], "question": "which entity is stationary", "label": 1}, {"captions": ["water pouring and bubbling", "an airplane engine spools and people speak"], "sample_ids": ["uyRfq-jKPpo", "wTjoRj1se3U"], "start_seconds": ["50", "390"], "properties": ["water, bubbles, pouring", "airplane, engine, spool"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["water is running from a faucet", "a jet engine is running and people are talking"], "question": "which entity is a moving object", "label": 1}, {"captions": ["continuous sneezing together with speech", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["x4dZyf9Gbj0", "uEU-Hg5MTN8"], "start_seconds": ["130", "27"], "properties": ["continuous, sneeze, speech", "a woman, laughs, animal"], "captions_pred_video": ["footage is blurry and out of focus", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman sneezes and speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by an electronic beep", "a drill drills through something then people begin laughing"], "sample_ids": ["wSVhSdj0F0", "tEE3MpBt1sg"], "start_seconds": ["10", "50"], "properties": ["horn honks, keys jingle, electronic beep", "drill, something, laugh"], "captions_pred_video": [null, "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "people are laughing breathing and speaking with background noise "], "question": "which entity is a drill?", "label": 1}, {"captions": ["a railroad crossing bell rings as a train horn blows", "a vehicle is skidding and squealing tires"], "sample_ids": ["tZGN5a7ybxo", "soTOh3zYJfY"], "start_seconds": ["60", "40"], "properties": ["ring, train, horn", "vehicle, skid, tires"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "a red car drifting on a winding road with smoke coming out of it"], "captions_pred_audio": ["a train is moving and blowing its horn ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["people speak and tapping occurs", "winds blows roughly as a vehicle races past"], "sample_ids": ["tFCUUGdREgA", "xjvTpk2Zpr8"], "start_seconds": ["70", "70"], "properties": ["people, tap, speak", "wind, blows, vehicle"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["a beep occurs briefly", "music plays and a woman speaks on a radio before gunshots are fired"], "sample_ids": ["xtWeJ56-U-g", "xKB8O8LTs6s"], "start_seconds": ["20", "70"], "properties": ["beep, occur, briefly", "music, radio, gunshots"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has a woman speaking on a radio?", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "a woman speaks and dog vocalizes"], "sample_ids": ["voJh2gJxXhA", "uWAAAL4CIoc"], "start_seconds": ["50", "0"], "properties": ["music, frog, croak", "a, dog, vocalize"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", null], "captions_pred_audio": ["music is playing and crickets are chirping ", "a woman is speaking and a dog is barking "], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks while video game music plays with some clicking", "someone whistles a tune"], "sample_ids": ["tw76HGONaKg", "sIXTftIuUgw"], "start_seconds": ["570", "90"], "properties": ["music, click, man", "someone, tune, whistle"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", null], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a horse runs while two women talk", "an infant crying as a woman laughs"], "sample_ids": ["sdvI1mHAsc", "xhmRY9yhC7c"], "start_seconds": ["20", "20"], "properties": ["two women, horse, run", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "a child speaks"], "sample_ids": ["soTOh3zYJfY", "yW6FWLSLkx4"], "start_seconds": ["40", "40"], "properties": ["vehicle, skid, tires", "a, child, speaks"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["wyllXV6PjKo", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["a baby, a woman, a man", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman speaks and a baby cries", "a woman is speaking and a baby is crying"], "question": "which entity has a baby?", "label": 0}, {"captions": ["children speak as a female ask them questions", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["wEBlkGWVWwE", "y8WEcpOlT3I"], "start_seconds": ["260", "40"], "properties": ["female, speak, questions", "harsh, wind, blows"], "captions_pred_video": ["shows a person writing on the whiteboard", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man is speaking with wind noise in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a toilet flushes and water drains", "water flows as men speak and yell"], "sample_ids": ["sfAvvZwdLCY", "vJ7JPEFhyLA"], "start_seconds": ["20", "16"], "properties": ["water drains, flushes, water", "water, flow, men"], "captions_pred_video": ["footage of the toilet in the bathroom", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more water", "label": 1}, {"captions": ["a baby laugh at a sputter", "pigeons vocalize and birds chirp"], "sample_ids": ["sLUnaPT5gM8", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["laugh, sputter, baby", "vocalize, bird, chirp"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "of the pigeon in the cage"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "wind blows as people chatter quietly"], "sample_ids": ["y2bVZ7rz-5M", "xBxDz0CFVn0"], "start_seconds": ["280", "30"], "properties": ["motor noise, horn, siren", "wind, chatter, people"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "footage is blurry and out of focus"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "a weapon fires multiple times"], "sample_ids": ["vSeGhaZt-aI", "sMC07Ucy7kg"], "start_seconds": ["50", "10"], "properties": ["water, bubbles, run", "weapon, fire, multiple"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage is from a car's point of view"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "people are clapping and speaking with background noise "], "question": "which entity is more violent", "label": 1}, {"captions": ["leaves rustle while man speaks", "people cheer as a vehicle engine revs"], "sample_ids": ["zOZleIRqZm4", "xjhAnI2q6hM"], "start_seconds": ["80", "6"], "properties": ["leaves, rustle, speak", "engine revs, vehicle, people"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a truck is revving its engine and a man is speaking "], "question": "which entity is more likely to be in a vehicle?", "label": 1}, {"captions": ["children speak and play together", "water splashes and a door squeaks"], "sample_ids": ["yVVP8XvWJTo", "sdXV-ylviw"], "start_seconds": ["260", "190"], "properties": ["children, speak, play", "sound, splash, door"], "captions_pred_video": ["footage of a playground at a school or daycare center", null], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a dog barks and taps with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "an airplane engine runs"], "sample_ids": ["vbZ-0lGPneg", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["a woman, a television program, a bird", "engine, airplane, runs"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a door slams shut roughly", "a vehicle engine accelerating then running on idle"], "sample_ids": ["zkKdxzNC97Y", "vYkA3cfXp5Q"], "start_seconds": ["27", "30"], "properties": ["a door, slams, shut", "engine, accelerate, idle"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a door is opened and closed", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["tPJvjq9QePY", "zl9Dqx-j7q4"], "start_seconds": ["40", "6"], "properties": ["animal, bleat, moo", "engine, laugh, loud"], "captions_pred_video": ["a dog and a sheep in a barn", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a baby cries and a man speaks", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a person is burping while a girl speaks", "a man speaks while turning a water faucet on"], "sample_ids": ["vdoxuJn9lTc", "vf9xf3vMsGM"], "start_seconds": ["40", "540"], "properties": ["person, burp, girl", "A man speaks while turning a water faucet on."], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "of the person washing their hands under the faucet"], "captions_pred_audio": ["a child speaks followed by a burp", "a man is speaking while water is running in the background"], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "water pouring and bubbling"], "sample_ids": ["tEE3MpBt1sg", "uyRfq-jKPpo"], "start_seconds": ["50", "50"], "properties": ["drill, something, laugh", "water, bubbles, pouring"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["male speech with light ticking", "winds blows roughly as a vehicle races past"], "sample_ids": ["xO-Q2BlIIPU", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["male, speech, ticking", "wind, blows, vehicle"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "some tunes played by whistling"], "sample_ids": ["xERFUeZONz8", "u6BnG6YZqJ4"], "start_seconds": ["0", "0"], "properties": ["ring, approach, traffic", "tune, play, whistling"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["an emergency vehicle siren blares", "a person whistling a song"], "question": "which entity is played by whistling", "label": 1}, {"captions": ["wind loudly blowing while people speak in the background followed by a horn blowing", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["xjhAnI2q6hM", "uZesmtKZGSw"], "start_seconds": ["6", "250"], "properties": ["wind, blow, loudly", "men, talk, cars"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "plastic is tapped on while someone speaks"], "sample_ids": ["ylpYOorfH4o", "wvKpEYswXO0"], "start_seconds": ["410", "150"], "properties": ["motor, run, steady", "plastic, tap, speak"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "small dogs yip and bark sharply"], "sample_ids": ["v7jJS8aAyA", "v-wcQf4BDY0"], "start_seconds": ["10", "120"], "properties": ["wind, blows, loudly", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as horns blow", "a telephone rings followed by a woman talking"], "sample_ids": ["tHyNqRyK34A", "tGcFnX0GHI"], "start_seconds": ["24", "0"], "properties": ["a, man, speaks", "ring, talk, woman"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", null], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["xl2PIWyXaM", "ziUT9IFTkjg"], "start_seconds": ["160", "10"], "properties": ["chirp, man, younger person", "background, birds, rustling"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and people are talking", "birds are chirping and a chime is ringing "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "someone is typing on a computer keyboard"], "sample_ids": ["wz7N8YRy74I", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["rooster, crow, background, men", "keyboard, type, computer"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "a man speaks as a motor runs in the background"], "sample_ids": ["vh30P49Po6s", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["loud, continuous, quacks", "background, motor, run"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a duck is quacking loudly", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is quieter", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "a woman speaks with water running"], "sample_ids": ["sjlVMgdGSK0", "wTideSjRFS0"], "start_seconds": ["30", "30"], "properties": ["accelerates, vehicle, race car", "water, running, woman"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman is speaking while water is running in the background"], "question": "which entity is stationary", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a mechanical buzzing getting louder"], "sample_ids": ["wz7N8YRy74I", "sEprKHm8Sj8"], "start_seconds": ["30", "90"], "properties": ["rooster, crow, background, people", "noise, loud, buzzing"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a race car accelerates and revs its engine "], "question": "which entity is a noise", "label": 1}, {"captions": ["a male speaks over some small clicks", "here comes the train and it starts to blow the horn and get close"], "sample_ids": ["uXxVebHsGZ8", "s7knHCFW82w"], "start_seconds": ["30", "30"], "properties": ["male, clicks, speak", "blow horn, get close, train"], "captions_pred_video": [null, "footage of the train on the tracks near a building and a car parked on the side of the road"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a train is blowing its horn and its wheels are squealing "], "question": "which entity is moving", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "someone snores nearby"], "sample_ids": ["yks4cLgIDMc", "spJCm8tD9Zo"], "start_seconds": ["170", "90"], "properties": ["background, speaking, child", "someone snores, nearby, someone"], "captions_pred_video": ["footage of two kids wrestling on the floor", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a man is speaking and a child is crying", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "wind blows as people chatter quietly"], "sample_ids": ["y8WEcpOlT3I", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["harsh, wind, blows", "wind, chatter, people"], "captions_pred_video": ["on how to use a sewing machine youtube", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "people applaud and hoot and chat quietly"], "sample_ids": ["x6ijhqRY38s", "wwyfGO2J4"], "start_seconds": ["250", "90"], "properties": ["bowl, silverware, man", "people, applaud, hoot"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", null], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["wTjoRj1se3U", "yajyRTUQk3U"], "start_seconds": ["390", "400"], "properties": ["engine, run, people", "a woman, something, fried"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a jet engine is running and people are talking", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "vehicles pass by on a roadway"], "sample_ids": ["uJV8NDaHqqk", "tgbONvsP47Y"], "start_seconds": ["100", "0"], "properties": ["loud, fly, chirp", "pass, vehicle, roadway"], "captions_pred_video": ["a bee hive in a wooden box", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a swarm of bees buzzing around", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a person whistles a meandering tune", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["uFoga8sHpiw", "w5W5Kqtc8E"], "start_seconds": ["90", "100"], "properties": ["person, tune, whistle", "wind, blow, vehicle"], "captions_pred_video": ["footage of a bird in a cage", null], "captions_pred_audio": ["a person whistles a song", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["water splashes and a door squeaks", "a woman speaks happily and an animal chirps"], "sample_ids": ["sdXV-ylviw", "uWAAAL4CIoc"], "start_seconds": ["190", "0"], "properties": ["sound, splash, door", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dog barks and taps with background noise ", "a woman is speaking and a dog is barking "], "question": "which entity has a higher pitch", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "an airplane engine spools and people speak"], "sample_ids": ["su6FAOcOA8c", "wTjoRj1se3U"], "start_seconds": ["4", "390"], "properties": ["engine, idle, woman", "airplane, engine, spool"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a jet engine is running and people are talking"], "question": "which entity is a vehicle", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a woman speaks as she rubs two objects together"], "sample_ids": ["uzQnlJXBbOM", "vzxHnu-SFEw"], "start_seconds": ["50", "80"], "properties": ["ringing, beep, stop", "two objects, woman, speak"], "captions_pred_video": ["footage of a person using a cell phone on a table", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a telephone rings and a man speaks", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a skill", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vdoxuJn9lTc", "zl9Dqx-j7q4"], "start_seconds": ["40", "6"], "properties": ["burp, loud, girl", "engine, laugh, loud"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a child speaks followed by a burp", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["water flows as men speak and yell", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vJ7JPEFhyLA", "xfaoyyzw2WU"], "start_seconds": ["16", "180"], "properties": ["water, flow, men", "loud, jet engine, roar"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "water is sprayed across a hard surface"], "sample_ids": ["xl2PIWyXaM", "sQwlkXjQabo"], "start_seconds": ["160", "10"], "properties": ["chirp, man, younger person", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["birds are chirping and people are talking", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "a car speeding up in the distance"], "sample_ids": ["sapQIQUhFc", "u0TrcHhkPQ"], "start_seconds": ["280", "20"], "properties": ["liquid, flow, distance", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a person sniffs and sneezes"], "sample_ids": ["vBHyYJ8pL0", "uRlbY6aoBU"], "start_seconds": ["2", "0"], "properties": ["noise, door, opening", "sneezes, person, sniffs"], "captions_pred_video": [null, null], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a man is sneezing "], "question": "which entity is accompanied by a door opening and closing", "label": 0}, {"captions": ["someone sprays liquid onto a hard surface", "water is sprayed across a hard surface"], "sample_ids": ["sQwlkXjQabo", "sQwlkXjQabo"], "start_seconds": ["10", "10"], "properties": ["liquid, surface, spray", "water, spray, surface"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["spraying followed by silence", "spraying followed by silence"], "question": "which entity is sprayed across a hard surface", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "a bird chirps in response to a woman chirping for the birds"], "sample_ids": ["wnpJndXuxLc", "uOpoD0gGXcs"], "start_seconds": ["50", "120"], "properties": ["blows, vehicle, train", "chirps, woman, bird"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a herd of cows grazing in the field"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "birds are chirping and a man is speaking"], "question": "which entity is a person", "label": 1}, {"captions": ["material crumbles into a microphone", "water splashes as an animal walks through"], "sample_ids": ["vofpvUo6NAw", "w1ir-sZ3Im8"], "start_seconds": ["220", "90"], "properties": ["material, crumbles, microphone", "animal, water, splashes"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["paper is being crumpled and crinkled", "water splashes and gurgles as people speak"], "question": "which entity is more likely to be a video of a person speaking?", "label": 0}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tQWGZLItBXk", "tdWhHV3X25Q"], "start_seconds": ["170", "60"], "properties": ["music, person, ding", "applause, audience, yells"], "captions_pred_video": ["worms revolution screenshots", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a man is speaking and a crowd is clapping"], "question": "which entity has more people", "label": 1}, {"captions": ["a railroad crossing bell rings as a train horn blows", "a vehicle engine accelerating then running on idle"], "sample_ids": ["tZGN5a7ybxo", "vYkA3cfXp5Q"], "start_seconds": ["60", "30"], "properties": ["ring, train, horn", "engine, accelerate, idle"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a train is moving and blowing its horn ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a train engine runs and a horn blows", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["zPX9o1uDiI", "wqZ135Ssz0"], "start_seconds": ["40", "60"], "properties": ["engine, horn, run", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a human activity", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "two frogs croak at each other"], "sample_ids": ["un9VQlzgZM", "zg0X6BnhOLQ"], "start_seconds": ["5", "410"], "properties": ["females, talk, laugh", "two frogs, croak, at each other"], "captions_pred_video": [null, "footage of lightning in the sky at night"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a frog is croaking"], "question": "which entity is more likely to be a frog", "label": 1}, {"captions": ["scraping and female speech with distant music", "someone whistles a tune"], "sample_ids": ["yHeVV-xeOxQ", "sIXTftIuUgw"], "start_seconds": ["130", "90"], "properties": ["female, speech, music", "someone, tune, whistle"], "captions_pred_video": ["of a girl milking a goat's udder", null], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "wind blows as people chatter quietly"], "sample_ids": ["y8dSeubCNI", "xBxDz0CFVn0"], "start_seconds": ["4", "30"], "properties": ["men, women, car", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["an engine revving and people talking in the background", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "people speak in a closed space"], "sample_ids": ["s6DESzUTGjY", "sTpirNYo8vQ"], "start_seconds": ["16", "30"], "properties": ["wind, laugh, woman", "people, space, speak"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "of a man taking a selfie on a bus"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a man is speaking while a car is revving and accelerating "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["sK4u5T8hW78", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["a, car, pass", "engine, revs, vehicle"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a race car accelerates and revs its engine "], "question": "which vehicle is passing by", "label": 1}, {"captions": ["white noise and birds chirping", "roadway noise occurs and a truck accelerates"], "sample_ids": ["wRBHTgrbiwg", "tgbONvsP47Y"], "start_seconds": ["50", "0"], "properties": ["noise, white, chirping", "noise, truck, accelerate"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of a fire truck entering a garage"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a car is driving on the road "], "question": "which noise is more likely to be heard", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["xzKKf9bKNUo", "zl9Dqx-j7q4"], "start_seconds": ["10", "6"], "properties": ["background, noise, snoring", "engine, laugh, loud"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a person snoring loudly", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "small dogs yip and bark sharply"], "sample_ids": ["vb1fPSDI4c", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["multiple, people, yell", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["food is frying then a woman speaks", "a man speaks followed by another man speaking outside"], "sample_ids": ["ukxt9I7eMMg", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["food, woman, speak", "two men, speak, follow"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a person sniffles and then sneezes in the distance", "a frog croaks as other frogs croak in the background"], "sample_ids": ["uRlbY6aoBU", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["a, distance, sneeze", "background, frog, croak"], "captions_pred_video": [null, "a close up of a frog in the water"], "captions_pred_audio": ["a man is sneezing ", "a frog is croaking"], "question": "which entity is a croaker", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "winds blows roughly as a vehicle races past"], "sample_ids": ["sfAvvZwdLCY", "xjvTpk2Zpr8"], "start_seconds": ["20", "70"], "properties": ["flushes, drains, water", "wind, blows, vehicle"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a toilet is flushed", "a jet engine roars and wind blows "], "question": "which entity is a source of water", "label": 0}, {"captions": ["a door opens and closes", "birds chirp and objects are moved around"], "sample_ids": ["vBHyYJ8pL0", "yPUYU6t3rwo"], "start_seconds": ["2", "370"], "properties": ["open, close, door", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["uEU-Hg5MTN8", "uYT5gxnyMWM"], "start_seconds": ["27", "50"], "properties": ["a woman, laughs, animal", "a, scream, girl"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "several insects fly while two men talk"], "sample_ids": ["y4tPJXBKDig", "s-T9OVOiMLo"], "start_seconds": ["20", "330"], "properties": ["a, noise, talk", "several, fly, men"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a video of a girl talking", "label": 0}, {"captions": ["an engine idles consistently before sputtering some", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["rwTERCUno", "tdWhHV3X25Q"], "start_seconds": ["90", "60"], "properties": ["engine, idle, sputter", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["an engine is idling and vibrating", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a woman speaks and other women and a man talk with her", "an airplane engine runs"], "sample_ids": ["vbpKkWvfOu4", "yVPZ2MNWpms"], "start_seconds": ["560", "0"], "properties": ["a, woman, man", "engine, airplane, runs"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "paper is crumpling consistently"], "sample_ids": ["xl2PIWyXaM", "v5cSxLaHADY"], "start_seconds": ["160", "0"], "properties": ["chirp, man, younger person", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["birds are chirping and people are talking", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "a machine beeps continuously"], "sample_ids": ["wyllXV6PjKo", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["a kid, talk, cry", "beeps, machine, continuously"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman speaks and a baby cries", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "a duck quacks continuously"], "sample_ids": ["xO-Q2BlIIPU", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["two men, exclamation, speak", "quacks, continuously, duck"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["an animal quacks rapidly", "an infant crying as a woman laughs"], "sample_ids": ["vh30P49Po6s", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["animal, quacks, rapidly", "a, laugh, infant"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a duck is quacking loudly", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["u21-Z5gJCB8", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["background, voice, man", "two men, woman, birds"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "a clock ticktocks"], "sample_ids": ["sDSppXIlJrs", "v-g-j2uTByM"], "start_seconds": ["27", "30"], "properties": ["microphone, water, wind", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a clock is ticking loudly"], "question": "which entity is a clock?", "label": 1}, {"captions": ["food is frying and sizzles", "dishes cling together then a man begins to speak"], "sample_ids": ["zNRChLjqcU", "sQGXqGcwOTc"], "start_seconds": ["220", "3"], "properties": ["food is frying, sizzles, food", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["water is running from a faucet into a sink", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["a man speaks as bees buzz and birds chirp", "a woman speaks as she rubs two objects together"], "sample_ids": ["t25U-v4k4ts", "vzxHnu-SFEw"], "start_seconds": ["40", "80"], "properties": ["bees buzz, birds chirp, man speaks", "two objects, woman, speak"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "an animal growls followed by birds chirping"], "sample_ids": ["w9lpbUn0hPc", "y2ZBGpgbhHM"], "start_seconds": ["30", "30"], "properties": ["male, wind, rustling", "animal, growl, bird"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", null], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "birds chirping and a dog panting"], "question": "which entity is followed by birds chirping", "label": 1}, {"captions": ["water splashes as an animal walks through", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["w1ir-sZ3Im8", "yDoT73BWsdA"], "start_seconds": ["90", "10"], "properties": ["animal, water, splashes", "engine, revs, vehicle"], "captions_pred_video": ["footage of a group of people riding horses through a river", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["water splashes and gurgles as people speak", "a race car accelerates and revs its engine "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["an audience gives applause as a man yells and a group sings", "a woman speaks and then a man speaks"], "sample_ids": ["tdWhHV3X25Q", "vbpKkWvfOu4"], "start_seconds": ["60", "560"], "properties": ["applause, audience, yells", "a, man, speaks"], "captions_pred_video": ["a man is talking to another man on a stage in front of a microphone", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a woman is speaking and a man is speaking"], "question": "which entity has a man speaking?", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "birds vocalize and chirp continuously"], "sample_ids": ["uOpoD0gGXcs", "w1mlz3Pe4fU"], "start_seconds": ["120", "300"], "properties": ["chirps, woman, bird", "vocalize, chirp, continuously"], "captions_pred_video": ["a herd of cows grazing in the field", "of a bird in a cage"], "captions_pred_audio": ["birds are chirping and a man is speaking", "birds are chirping and singing"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks while water drains", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["vSeGhaZt-aI", "w34HjHr6gAY"], "start_seconds": ["50", "30"], "properties": ["water, drain, man", "beeps, hit, woman"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["w2M4i1mklOA", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["loud, chime, bell", "motor noise, horn, siren"], "captions_pred_video": ["footage of an antique clock", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is louder", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vXlk0lIQBFo", "zj2R0XoFr5k"], "start_seconds": ["470", "50"], "properties": ["wind, talk, vocalize", "airplane, boy, fly"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a person snoring several times", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["spJCm8tD9Zo", "tDVADusiIoc"], "start_seconds": ["90", "60"], "properties": ["snore, person, several", "water, radio, man"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a person?", "label": 0}, {"captions": ["birds chirp quietly and an adult man speaks", "a car speeding up in the distance"], "sample_ids": ["zuua6-5goWw", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["birds, chirp, quiet, man, speaks", "distance, car, speed"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", null], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["engines sputter roughly and tires squeal", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["zhx6hoYrHeI", "uYT5gxnyMWM"], "start_seconds": ["160", "50"], "properties": ["engine, sputter, rough", "female, spraying, scream"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "an engine idles consistently before sputtering some"], "sample_ids": ["tDlfY3nmx1A", "rwTERCUno"], "start_seconds": ["160", "90"], "properties": ["applause, laugh, man", "engine, idle, sputter"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", null], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "an engine is idling and vibrating"], "question": "which entity is a machine?", "label": 1}, {"captions": ["an airplane engine roars increasingly louder", "a car accelerates and wind blows"], "sample_ids": ["vBslzh7saPw", "u0TrcHhkPQ"], "start_seconds": ["90", "20"], "properties": ["engine, roar, louder", "accelerates, wind, blows"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["water runs into a sink while men speak", "a airplane flies overhead as a woman speaks"], "sample_ids": ["vzceMbklWc", "zj2R0XoFr5k"], "start_seconds": ["180", "50"], "properties": ["water, sink, run", "airplane, fly, woman"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["water is running and a man is speaking", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "a woman speaks as frying food sizzles"], "sample_ids": ["wqN6IIHw3po", "wTideSjRFS0"], "start_seconds": ["30", "30"], "properties": ["rain, surface, fall", "food, sizzle, woman"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["a man is speaking and water is splashing", "a woman is speaking while water is running in the background"], "question": "which entity is a man speaking to?", "label": 0}, {"captions": ["a vehicle accelerates squealing tires", "a stream of water runs briefly"], "sample_ids": ["sd7xVssqlw", "x-PeY8Yb8M4"], "start_seconds": ["50", "300"], "properties": ["accelerates, tires, squealing", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "people speak as gunfire rings out"], "sample_ids": ["sAam2NqGhLY", "wqTCwqVRDlk"], "start_seconds": ["20", "80"], "properties": ["snoring, breathing, child", "gunfire, ring, speak"], "captions_pred_video": ["of a little girl sleeping on a couch", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a person is snoring", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["su6FAOcOA8c", "vbZ-0lGPneg"], "start_seconds": ["4", "30"], "properties": ["engine, idle, woman", "a woman, a television program, a bird"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vW4x7S1VfQc", "vJ7JPEFhyLA"], "start_seconds": ["150", "16"], "properties": ["clacking, oil, woman", "three men, wind, flow"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["food sizzles in a frying pan", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a woman talking?", "label": 0}, {"captions": ["a man speaks as a machine runs", "an engine sputters followed by a car zooming by"], "sample_ids": ["vD6lYD1l0BY", "u5RmF3c3Aw"], "start_seconds": ["330", "60"], "properties": ["a, machine, run", "engine, car, zoom"], "captions_pred_video": ["game controller being held in the hands of the person", null], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a race car accelerates and skids with wind noise in the background "], "question": "which entity is a car?", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["xfaoyyzw2WU", "vbZ-0lGPneg"], "start_seconds": ["180", "30"], "properties": ["loud, jet engine, roar", "a woman, a television program, a bird"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a woman is speaking and a dog is whimpering"], "question": "which is quieter", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "a woman speaks happily and an animal chirps"], "sample_ids": ["slZLHwNbbt4", "uWAAAL4CIoc"], "start_seconds": ["300", "0"], "properties": ["clap, distance, horn", "a woman, chirps, animal"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", null], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "an adult man speaks over glass clinking"], "sample_ids": ["yYEVLuqEytU", "u6jIvCtKarQ"], "start_seconds": ["40", "70"], "properties": ["grunt, slurp, background", "a, man, speaks"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "footage of a person using a blender on a stove top"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a man is speaking and dishes are being moved with background noise "], "question": "which entity is a person", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a woman speaks as she rubs two objects together"], "sample_ids": ["xKB8O8LTs6s", "vzxHnu-SFEw"], "start_seconds": ["70", "80"], "properties": ["music, radio, gunshots", "two objects, woman, speak"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which woman is speaking", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "rustling with distant murmuring"], "sample_ids": ["vh30P49Po6s", "wnNNcxAPwGQ"], "start_seconds": ["30", "0"], "properties": ["loud, continuous, quacks", "sound, distance, rustling"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of a yellow truck doing a burnout on a race track"], "captions_pred_audio": ["a duck is quacking loudly", "a crowd of people are talking and laughing while a skateboard rolls by "], "question": "which entity is quieter", "label": 1}, {"captions": ["a weapon fires multiple times", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["sMC07Ucy7kg", "uEU-Hg5MTN8"], "start_seconds": ["10", "27"], "properties": ["weapon, fire, multiple", "animal, grunts, snorts"], "captions_pred_video": ["footage is from a car's point of view", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity is not a weapon", "label": 1}, {"captions": ["an animal quacks rapidly", "someone snores nearby"], "sample_ids": ["vh30P49Po6s", "spJCm8tD9Zo"], "start_seconds": ["30", "90"], "properties": ["animal, quacks, rapidly", "someone snores, nearby, someone"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a duck is quacking loudly", "a person is snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "a stream of water runs briefly"], "sample_ids": ["voJh2gJxXhA", "x-PeY8Yb8M4"], "start_seconds": ["50", "300"], "properties": ["music, frog, croak", "stream, water, run"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["music is playing and crickets are chirping ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["uWPRNLnpy7Y", "yDoT73BWsdA"], "start_seconds": ["10", "10"], "properties": ["accelerate, laugh, vehicle", "engine, revs, vehicle"], "captions_pred_video": ["is taken from a car driving down the street", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a race car accelerates and revs its engine "], "question": "which vehicle is moving faster", "label": 0}, {"captions": ["scraping and female speech with distant music", "a telephone rings followed by a woman talking"], "sample_ids": ["yHeVV-xeOxQ", "tGcFnX0GHI"], "start_seconds": ["130", "0"], "properties": ["female, speech, music", "ring, talk, woman"], "captions_pred_video": ["of a girl milking a goat's udder", null], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a dial tone sounds followed by a woman speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a dark barks and whimpers", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sYj4hpDUZDQ", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["barks, whimpers, dark", "two men, woman, birds"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", null], "captions_pred_audio": ["a dog barks and a cat meows", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["people speak softly as food sizzles", "frogs croak and vocalize"], "sample_ids": ["yhQ2Lg-7qDY", "yswmmRZFItk"], "start_seconds": ["130", "0"], "properties": ["food, sizzle, speak", "croak, vocalize, frog"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "a close up of a frog in the water"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a frog is croaking"], "question": "which entity is a frog", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "continuous sizzling with a woman speaking towards the end"], "sample_ids": ["vfYTJq7nU", "ukxt9I7eMMg"], "start_seconds": ["130", "30"], "properties": ["rustling, ducks, quack", "continuous, woman, speaking"], "captions_pred_video": [null, "footage of a person preparing food on a stool in a kitchen"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a woman is speaking while food is frying in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vVhthZ45k3Y", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["cat, purr, hiss", "a woman, something, fried"], "captions_pred_video": ["footage is blurry and out of focus", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "paper folding and crinkling"], "sample_ids": ["tIY7qOV3rEM", "zPpG3RD8lSs"], "start_seconds": ["0", "20"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "paper, fold, crinkle"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "the wind blows and a mouse clicks "], "question": "which entity is not a living thing", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "wind blows as people chatter quietly"], "sample_ids": ["xfudFO976zE", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["animal, bleats, cry", "wind, chatter, people"], "captions_pred_video": ["footage is blurry and shaky", "footage is blurry and out of focus"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a person speaks briefly", "a jet engine spools up and takes off"], "sample_ids": ["zOZleIRqZm4", "vBslzh7saPw"], "start_seconds": ["80", "90"], "properties": ["person, talk, brief", "engine, spools, takes"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a pickup truck carrying a large object down the road"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a jet engine roars and accelerates "], "question": "which is a moving object", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["yDoT73BWsdA", "xfaoyyzw2WU"], "start_seconds": ["10", "180"], "properties": ["engine, revs, vehicle", "loud, jet engine, roar"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "an aircraft engine roars and a man speaks "], "question": "which engine is louder", "label": 1}, {"captions": ["water bubbles and gurgles.", "birds chirp and objects are moved around"], "sample_ids": ["tB7hWb9gTuQ", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["bubbles, gurgles, water", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["water is splashing and gurgling", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["waves crash against a shoreline and people speak", "a woman speaks happily and an animal chirps"], "sample_ids": ["yFB25fqfU8I", "uWAAAL4CIoc"], "start_seconds": ["300", "0"], "properties": ["wave, crash, shoreline", "a woman, chirps, animal"], "captions_pred_video": ["footage of a person surfing in the ocean", null], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "a woman is speaking and a dog is barking "], "question": "which entity is more calm", "label": 1}, {"captions": ["a person is burping while a girl speaks", "paper folding and crinkling"], "sample_ids": ["vdoxuJn9lTc", "zPpG3RD8lSs"], "start_seconds": ["40", "20"], "properties": ["person, burp, girl", "paper, fold, crinkle"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a child speaks followed by a burp", "the wind blows and a mouse clicks "], "question": "which is not a person", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "some tunes played by whistling"], "sample_ids": ["xfudFO976zE", "u6BnG6YZqJ4"], "start_seconds": ["0", "0"], "properties": ["animal, bleats, cry", "tune, play, whistling"], "captions_pred_video": ["footage is blurry and shaky", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a toilet flushes and water drains", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["sfAvvZwdLCY", "w5W5Kqtc8E"], "start_seconds": ["20", "100"], "properties": ["water drains, flushes, water", "wind, blow, vehicle"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["an engine runs and a man speaks", "an emergency vehicle engine runs then a horn blows and siren sounds"], "sample_ids": ["yT5WfYMRr-U", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["engine, run, man", "engine, horn, siren"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a truck is honking its horn and a siren is blaring "], "question": "which entity has a horn and siren", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "water splashes as an animal walks through"], "sample_ids": ["ylpYOorfH4o", "w1ir-sZ3Im8"], "start_seconds": ["410", "90"], "properties": ["engine, running, wind", "animal, water, splashes"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking and an engine is revving", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "some tunes played by whistling"], "sample_ids": ["uYT5gxnyMWM", "u6BnG6YZqJ4"], "start_seconds": ["50", "0"], "properties": ["person, spray, yell", "tune, play, whistling"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "people speak as gunfire rings out"], "sample_ids": ["tDVADusiIoc", "wqTCwqVRDlk"], "start_seconds": ["60", "80"], "properties": ["man, radio, blows", "gunfire, ring, speak"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war zone", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "a car accelerates and wind blows"], "sample_ids": ["vs65y4qmyBE", "u0TrcHhkPQ"], "start_seconds": ["340", "20"], "properties": ["engine, run, man", "accelerates, wind, blows"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a helicopter engine runs", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["t5ZbXbniOWk", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["engine, helicopter, run", "female, spraying, scream"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["birds vocalize and a man speaks", "an infant crying as a woman laughs"], "sample_ids": ["v0wPrLBI3hg", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["vocalize, bird, speak", "a, laugh, infant"], "captions_pred_video": ["footage of the pigeons feeding on the ground", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "a baby cries and a woman speaks"], "question": "which entity is a human", "label": 1}, {"captions": ["a person snoring", "birds chirp and an owl hoots before a man speaks briefly"], "sample_ids": ["t8tv5YRMJUg", "wRBHTgrbiwg"], "start_seconds": ["0", "50"], "properties": ["a person, snore, loud", "bird, owl, speak"], "captions_pred_video": ["of a man getting his face licked by another man", "of a bee pollinating the flowers in the field"], "captions_pred_audio": ["a person sniffs and breathes heavily", "birds are chirping and insects are buzzing"], "question": "which entity is not a person?", "label": 1}, {"captions": ["a helicopter engine runs continuously", "dishes cling together then a man begins to speak"], "sample_ids": ["ugHJF0hfYkg", "sQGXqGcwOTc"], "start_seconds": ["10", "3"], "properties": ["engine, running, continuously", "cling, speak, dishes"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a helicopter is flying overhead ", "mechanisms are operating and water is splashing "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "a car speeding up in the distance"], "sample_ids": ["rwtmaKiCcQU", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["nozzle, depressed, spray can", "distance, car, speed"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", null], "captions_pred_audio": ["spraying and people speaking", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a man talks as several small engines run", "someone snores nearby"], "sample_ids": ["u9A6VZQCZpU", "spJCm8tD9Zo"], "start_seconds": ["30", "90"], "properties": ["a, man, talk", "someone snores, nearby, someone"], "captions_pred_video": [null, "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["uiItxDsDMFI", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["wood, piece, saw", "wind, blow, vehicle"], "captions_pred_video": ["a man cutting a log with an axe in the woods", null], "captions_pred_audio": ["a saw is being used with background noise ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is not being sawed", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "a diesel truck engine runs continuously"], "sample_ids": ["uYT5gxnyMWM", "sZvwOuuPGP0"], "start_seconds": ["50", "50"], "properties": ["a, scream, girl", "engine, diesel, truck"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "of a bulldozer clearing a road in a forest stock footage and royalty-free videos"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a medium engine is running "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "water pouring and bubbling"], "sample_ids": ["sWZzXuWYY", "uyRfq-jKPpo"], "start_seconds": ["420", "50"], "properties": ["male, speech, banging", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "water is running from a faucet"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "a horn rings out as a machine runs by"], "sample_ids": ["xyx6eNVEYRY", "slZLHwNbbt4"], "start_seconds": ["380", "300"], "properties": ["loud, engine, muffles", "a, horn, run"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["x6ijhqRY38s", "w34HjHr6gAY"], "start_seconds": ["250", "30"], "properties": ["bowl, silverware, man", "beeps, hit, woman"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a beep sounds followed by a child speaking"], "question": "which entity is about a woman talking?", "label": 1}, {"captions": ["ticking continues without interruption", "a man speaks uses a drill"], "sample_ids": ["v-g-j2uTByM", "x5eIC7S0fbg"], "start_seconds": ["30", "60"], "properties": ["ticking, continuous, clock", "A man is speaking, uses a drill, and is a tool"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "a person in surgical gloves is using a needle to remove a small object from a tooth"], "captions_pred_audio": ["a clock is ticking loudly", "a man is speaking and using a power tool "], "question": "which entity is a tool", "label": 1}, {"captions": ["birds tweet and squawk", "a person snores loudly multiple times at a close distance"], "sample_ids": ["w1mlz3Pe4fU", "sSMl2vc3ek"], "start_seconds": ["300", "20"], "properties": ["squawk, tweet, scream", "loud, multiple, distance"], "captions_pred_video": ["of a bird in a cage", null], "captions_pred_audio": ["birds are chirping and singing", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["birds coo incessantly", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["yZrFNS7GFBQ", "vlS6YMeWAPo"], "start_seconds": ["30", "40"], "properties": ["coo, bird, incessant", "sheep, baa, birds"], "captions_pred_video": ["of the bird in the cage", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["an owl hoots in the background ", "a goat bleats and birds chirp"], "question": "which entity is a sheep?", "label": 1}, {"captions": ["people speak softly as food sizzles", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["yhQ2Lg-7qDY", "su6FAOcOA8c"], "start_seconds": ["130", "4"], "properties": ["food, sizzle, speak", "engine, idle, woman"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a woman is speaking and a subway train is moving "], "question": "which entity is about a bus engine?", "label": 1}, {"captions": ["running water in a faucet with some clinks", "a series of light horn beeps is followed by a loud steam whistle"], "sample_ids": ["zNRChLjqcU", "wnpJndXuxLc"], "start_seconds": ["220", "50"], "properties": ["water, faucet, run", "beeps, loud, whistle"], "captions_pred_video": [null, "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["water is running from a faucet into a sink", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["a vehicle engine runs while a siren and horn sound", "a stream of water runs briefly"], "sample_ids": ["u--KhUW8l1Y", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["engine, sound, horn", "stream, water, run"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["leaves rustle while man speaks", "a infant makes noise and is excited"], "sample_ids": ["zOZleIRqZm4", "wIJK3-5y0kA"], "start_seconds": ["80", "30"], "properties": ["leaves, rustle, speak", "noise, excited, infant"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a baby cries and a woman speaks"], "question": "which is quieter", "label": 0}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["uEU-Hg5MTN8", "zl9Dqx-j7q4"], "start_seconds": ["27", "6"], "properties": ["a woman, laughs, animal", "engine, laugh, loud"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["several insects fly while two men talk", "water pouring and bubbling"], "sample_ids": ["s-T9OVOiMLo", "uyRfq-jKPpo"], "start_seconds": ["330", "50"], "properties": ["several, fly, men", "water, bubbles, pouring"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "dishes cling together then a man begins to speak"], "sample_ids": ["sWZzXuWYY", "sQGXqGcwOTc"], "start_seconds": ["420", "3"], "properties": ["male, clanks, thumps", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "mechanisms are operating and water is splashing "], "question": "which entity is about a machine?", "label": 0}, {"captions": ["a door opens and closes", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vBHyYJ8pL0", "zj2R0XoFr5k"], "start_seconds": ["2", "50"], "properties": ["open, close, door", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is moving", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "a propeller rotates loudly and intensely"], "sample_ids": ["x6ijhqRY38s", "ugHJF0hfYkg"], "start_seconds": ["250", "10"], "properties": ["something metal, glass, hit", "loud, intense, propeller"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "vehicles pass by on a roadway"], "sample_ids": ["wTjoRj1se3U", "tgbONvsP47Y"], "start_seconds": ["390", "0"], "properties": ["engine, run, people", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a jet engine is running and people are talking", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["water splashes as an animal walks through", "a car speeding up in the distance"], "sample_ids": ["w1ir-sZ3Im8", "u0TrcHhkPQ"], "start_seconds": ["90", "20"], "properties": ["animal, water, splashes", "distance, car, speed"], "captions_pred_video": ["footage of a group of people riding horses through a river", null], "captions_pred_audio": ["water splashes and gurgles as people speak", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["birds twitter and chirp and clatter", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["yeFvk9x0wWI", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["chirp, twitter, clatter", "a woman, something, fried"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "- a woman cooking in the kitchen"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "paper is crumpling consistently"], "sample_ids": ["wSVhSdj0F0", "v5cSxLaHADY"], "start_seconds": ["10", "0"], "properties": ["horn honks, keys jingle, slam", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["multiple insects buzz over rustling wind", "a stream of water flows as people talk and wind blows"], "sample_ids": ["tMJne1a4AFI", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["wind, buzz, rustling", "stream, water, flow"], "captions_pred_video": ["a swarm of bees on the ground", "footage is blurry and out of focus"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaking with light rustling", "people applaud and hoot and chat quietly"], "sample_ids": ["zOZleIRqZm4", "wwyfGO2J4"], "start_seconds": ["80", "90"], "properties": ["light, rustling, man", "people, applaud, hoot"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "a woman speaks over sizzling noise"], "sample_ids": ["xKB8O8LTs6s", "yajyRTUQk3U"], "start_seconds": ["70", "400"], "properties": ["music, gunfire, explosion", "noise, woman, speak"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "- a woman cooking in the kitchen"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a woman is speaking while food is frying in the background"], "question": "which entity has a woman speaking over a sizzling noise?", "label": 1}, {"captions": ["a person snores loudly multiple times at a close distance", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sSMl2vc3ek", "uYT5gxnyMWM"], "start_seconds": ["20", "50"], "properties": ["loud, multiple, distance", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is louder", "label": 1}, {"captions": ["a woman speaks followed by clicks and scraping", "a car speeding up in the distance"], "sample_ids": ["yYJksgsxx5U", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["audio, clicks, scraping", "distance, car, speed"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", null], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["humming of idling and revving engine with a man speaking", "some men converse over an engine running"], "sample_ids": ["wqADXCzngMw", "sCiy7QS1U"], "start_seconds": ["340", "300"], "properties": ["audio, humming, revving", "men, converse, engine"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", null], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a video", "label": 1}, {"captions": ["water bubbles and gurgles.", "water pouring and bubbling"], "sample_ids": ["tB7hWb9gTuQ", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["bubbles, gurgles, water", "water, bubbles, pouring"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["water is splashing and gurgling", "water is running from a faucet"], "question": "which entity is a video of water flowing and bubbling?", "label": 1}, {"captions": ["wind blowing followed by a zoom", "a man speaks as a car is passing by"], "sample_ids": ["vr8ZXjEBhMQ", "sK4u5T8hW78"], "start_seconds": ["150", "30"], "properties": ["wind, blow, zoom", "a, car, pass"], "captions_pred_video": ["is taken from a motorcycle's point of view", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a zoom of", "label": 0}, {"captions": ["a motor slows to a stopover traffic noises", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zofjfKhqLk8", "tdWhHV3X25Q"], "start_seconds": ["10", "60"], "properties": ["noise, stop, motor", "applause, audience, yells"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["tGcFnX0GHI", "zFjIWfSD-4"], "start_seconds": ["0", "410"], "properties": ["ring, talk, woman", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a woman talking?", "label": 0}, {"captions": ["two men speak as a buffeting wind blows", "small dogs yip and bark sharply"], "sample_ids": ["y8WEcpOlT3I", "v-wcQf4BDY0"], "start_seconds": ["40", "120"], "properties": ["wind, speak, buffeting", "bark, yip, sharply"], "captions_pred_video": ["on how to use a sewing machine youtube", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vXlk0lIQBFo", "yajyRTUQk3U"], "start_seconds": ["470", "400"], "properties": ["wind, speak, vocalize", "a woman, something, fried"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "- a woman cooking in the kitchen"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["tOSWIURC-4", "xfaoyyzw2WU"], "start_seconds": ["0", "180"], "properties": ["engine, work, nearby", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a lawn mower is running ", "an aircraft engine roars and a man speaks "], "question": "which engine is louder", "label": 1}, {"captions": ["wind blowing followed by a zoom", "an infant crying as a woman laughs"], "sample_ids": ["vr8ZXjEBhMQ", "xhmRY9yhC7c"], "start_seconds": ["150", "20"], "properties": ["wind, blow, zoom", "a, laugh, infant"], "captions_pred_video": ["is taken from a motorcycle's point of view", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["several ducks are quacking and squawking", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["wfHeoPDLMaM", "vlS6YMeWAPo"], "start_seconds": ["30", "40"], "properties": ["quacking, squawking, ducks", "sheep, baa, birds"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["ducks are quacking", "a goat bleats and birds chirp"], "question": "which entity is a single animal", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "wind blows as people chatter quietly"], "sample_ids": ["sU53zg9Jp7s", "xBxDz0CFVn0"], "start_seconds": ["380", "30"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "wind, chatter, people"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "footage is blurry and out of focus"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "wind blowing followed by a zoom"], "sample_ids": ["vbpKkWvfOu4", "vr8ZXjEBhMQ"], "start_seconds": ["560", "150"], "properties": ["a, man, speaks", "wind, blow, zoom"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["birds chirp then an animal grunts", "people cheer as a vehicle engine revs"], "sample_ids": ["tDlysoZiA1I", "xjhAnI2q6hM"], "start_seconds": ["0", "6"], "properties": ["animal, grunt, chirp", "engine revs, vehicle, people"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["x5cuQjOdM3E", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["cat, talk, meow", "engine, laugh, loud"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a cat meows and a woman speaks", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a person snoring", "wind blows as people chatter quietly"], "sample_ids": ["t8tv5YRMJUg", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["a person, snore, loud", "wind, chatter, people"], "captions_pred_video": ["of a man getting his face licked by another man", "footage is blurry and out of focus"], "captions_pred_audio": ["a person sniffs and breathes heavily", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "motors rev and run loudly as a person laughs"], "sample_ids": ["vJ7JPEFhyLA", "zl9Dqx-j7q4"], "start_seconds": ["16", "6"], "properties": ["three men, wind, flow", "motors rev, laugh, loudly"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a jet engine roars "], "question": "which entity is more quiet", "label": 0}, {"captions": ["some clanking with distant murmuring", "motors rev and run loudly as a person laughs"], "sample_ids": ["uMTTDZ2mb4", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["clanking, murmuring, distant", "motors rev, laugh, loudly"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a jet engine roars "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a girl talking, laughing and sneezing noise", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["y4tPJXBKDig", "xKB8O8LTs6s"], "start_seconds": ["20", "70"], "properties": ["a, noise, talk", "music, gunfire, explosion"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a train horn sounds as a railroad passing bell rings", "a person speaks briefly"], "sample_ids": ["zgUgkpk78xU", "zOZleIRqZm4"], "start_seconds": ["70", "80"], "properties": ["horn, bell, train", "person, talk, brief"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a man is speaking with crickets chirping in the background"], "question": "which entity is talking", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "some liquid flows while a woman laughs and man talks"], "sample_ids": ["vJvryTwuAV8", "vddP56-ogds"], "start_seconds": ["16", "30"], "properties": ["audience, cheer, man", "liquid, laughs, man"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", null], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "water is running and gurgling and a man is speaking"], "question": "which entity has a man speaking to an audience?", "label": 0}, {"captions": ["motors runs briefly and tires screech", "multiple people speak and children yell while water gurgles"], "sample_ids": ["yRx9txMcBl0", "vb1fPSDI4c"], "start_seconds": ["40", "30"], "properties": ["motors, tires, screech", "multiple, people, yell"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", null], "captions_pred_audio": ["a car is revving its engine and skidding ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "small dogs yip and bark sharply"], "sample_ids": ["rqu8iB22IY", "v-wcQf4BDY0"], "start_seconds": ["5", "120"], "properties": ["sound, repeats, laugh", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a dog barks and growls"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vbr9mHKc8WM", "yajyRTUQk3U"], "start_seconds": ["40", "400"], "properties": ["noise, loudness, engine", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["an engine is idling", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["an infant crying frantically", "a man speaks as a motor runs in the background"], "sample_ids": ["zwOBqeFTgiU", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["cry, infant, frantically", "background, motor, run"], "captions_pred_video": ["of the baby crying in the car seat", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a baby cries loudly", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is silent", "label": 1}, {"captions": ["ticking continues without interruption", "a man speaks as a motor runs in the background"], "sample_ids": ["v-g-j2uTByM", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["ticking, continuous, clock", "background, motor, run"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a clock is ticking loudly", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a clock", "label": 0}, {"captions": ["men speak and a nozzle sprays liquid", "water flows and trickles"], "sample_ids": ["wRV8yMk886E", "tB7hWb9gTuQ"], "start_seconds": ["0", "30"], "properties": ["liquid, spray, nozzle", "water, flow, trickle"], "captions_pred_video": ["two cars are parked in a parking lot at night", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man speaks followed by a loud burst", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["birds vocalize and a man speaks", "an insect buzzes around continuously"], "sample_ids": ["v0wPrLBI3hg", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["vocalize, bird, speak", "buzzes, continuously, insect"], "captions_pred_video": ["footage of the pigeons feeding on the ground", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "a fly is buzzing around a microphone "], "question": "which entity is not a bird?", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["xERFUeZONz8", "vbZ-0lGPneg"], "start_seconds": ["0", "30"], "properties": ["ring, approach, traffic", "a woman, a television program, a bird"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["an emergency vehicle siren blares", "a woman is speaking and a dog is whimpering"], "question": "which entity is more quiet", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "waves crash against a shoreline and wind blows"], "sample_ids": ["vJvryTwuAV8", "zdYdyF9-m8U"], "start_seconds": ["16", "7"], "properties": ["audience, cheer, man", "wind, crash, shoreline"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "a person kayaking in the ocean near a cliff"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "waves crash and wind blows "], "question": "which entity is more active", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "people applaud and hoot and chat quietly"], "sample_ids": ["sQGXqGcwOTc", "wwyfGO2J4"], "start_seconds": ["3", "90"], "properties": ["cling, speak, dishes", "people, applaud, hoot"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", null], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be a performance", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "wind blowing followed by a zoom"], "sample_ids": ["wqN6IIHw3po", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["rain, surface, fall", "wind, blow, zoom"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking and water is splashing", "wind blows and a chainsaw cuts through wood "], "question": "which entity is not a zoom?", "label": 0}, {"captions": ["material crumbles into a microphone", "water flows as men speak and yell"], "sample_ids": ["vofpvUo6NAw", "vJ7JPEFhyLA"], "start_seconds": ["220", "16"], "properties": ["material, crumbles, microphone", "water, flow, men"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "multiple people speak and children yell while water gurgles"], "sample_ids": ["ylpYOorfH4o", "vb1fPSDI4c"], "start_seconds": ["410", "30"], "properties": ["motor, run, steady", "multiple, people, yell"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", null], "captions_pred_audio": ["a man is speaking and an engine is revving", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vcmWSmvti8", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["music, man, fire", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is about to fly", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "a stream of water flows as people talk and wind blows"], "sample_ids": ["wqZ135Ssz0", "xBxDz0CFVn0"], "start_seconds": ["60", "30"], "properties": ["man, woman, squawks", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is accompanied by a man and woman speaking", "label": 0}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "someone whistles a tune"], "sample_ids": ["wyllXV6PjKo", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["a baby, a woman, a man", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman speaks and a baby cries", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["paper is crumpling consistently", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["v5cSxLaHADY", "vbZ-0lGPneg"], "start_seconds": ["0", "30"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "a woman, a television program, a bird"], "captions_pred_video": ["footage of the person holding a pair of scissors", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["paper is crumpled and crinkled", "a woman is speaking and a dog is whimpering"], "question": "which entity is more quiet", "label": 1}, {"captions": ["male speech with light ticking", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["xO-Q2BlIIPU", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["male, speech, ticking", "applause, audience, yells"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "a telephone rings followed by a woman talking"], "sample_ids": ["vb1fPSDI4c", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["multiple, people, yell", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a crowd of people are talking and laughing", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["there are rhythmical snoring nearby", "winds blows roughly as a vehicle races past"], "sample_ids": ["ujMt0-D-x2k", "xjvTpk2Zpr8"], "start_seconds": ["0", "70"], "properties": ["snoring, rhythmical, nearby", "wind, blows, vehicle"], "captions_pred_video": ["of the dog playing with a toy on the floor", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a person is snoring loudly", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a man speaks as a car is passing by"], "sample_ids": ["s4Uz1Ffgo04", "sK4u5T8hW78"], "start_seconds": ["100", "30"], "properties": ["roars, background, people speaking", "a, car, pass"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is quieter", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["tIY7qOV3rEM", "w5W5Kqtc8E"], "start_seconds": ["0", "100"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "wind, blow, vehicle"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", null], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["dishes cling together then a man begins to speak", "an animal growls followed by birds chirping"], "sample_ids": ["sQGXqGcwOTc", "y2ZBGpgbhHM"], "start_seconds": ["3", "30"], "properties": ["cling, speak, dishes", "animal, growl, bird"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", null], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "birds chirping and a dog panting"], "question": "which entity is more likely to be a scream", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["xBxDz0CFVn0", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["stream, water, flow", "a woman, laughs, animal"], "captions_pred_video": ["footage is blurry and out of focus", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["the rumbling of a bus followed by a soft male voice", "a man speaks as a motor runs in the background"], "sample_ids": ["vK93VuO0yNc", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["male voice, bus, rumble", "background, motor, run"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a car drives by with wind noise in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["small dogs yip and bark sharply", "a child speaks in closed space"], "sample_ids": ["v-wcQf4BDY0", "yW6FWLSLkx4"], "start_seconds": ["120", "40"], "properties": ["bark, yip, sharply", "child, space, speak"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a dog barks and growls", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "multiple people speak and children yell while water gurgles"], "sample_ids": ["tGcFnX0GHI", "vb1fPSDI4c"], "start_seconds": ["0", "30"], "properties": ["ring, talk, woman", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["sZPuqDgX2V0", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["engine, accelerate, intercom", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird nearby?", "label": 1}, {"captions": ["an insect buzzes around continuously", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["v25l1jef3JY", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["buzzes, continuously, insect", "a, scream, girl"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a woman is speaking and a baby is crying"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["slZLHwNbbt4", "ziUT9IFTkjg"], "start_seconds": ["300", "10"], "properties": ["clap, distance, horn", "background, birds, rustling"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", null], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["an animal quacks rapidly", "a car speeding up in the distance"], "sample_ids": ["vh30P49Po6s", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["animal, quacks, rapidly", "distance, car, speed"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "paper folding and crinkling"], "sample_ids": ["sOa7g-44Dag", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["background, man, spray", "paper, fold, crinkle"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "the wind blows and a mouse clicks "], "question": "which entity is a demonstration of a process", "label": 1}, {"captions": ["a child speaks in closed space", "a stream of water flows as people talk and wind blows"], "sample_ids": ["yW6FWLSLkx4", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["child, space, speak", "stream, water, flow"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "wind blowing and birds chirping with the distant cooing of a large bird"], "sample_ids": ["w5W5Kqtc8E", "wRBHTgrbiwg"], "start_seconds": ["100", "50"], "properties": ["wind, blow, vehicle", "birds, chirp, cooing"], "captions_pred_video": [null, "of a bee pollinating the flowers in the field"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "birds are chirping and insects are buzzing"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a toilet flushes and water drains", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sfAvvZwdLCY", "uZesmtKZGSw"], "start_seconds": ["20", "250"], "properties": ["water drains, flushes, water", "men, talk, cars"], "captions_pred_video": ["footage of the toilet in the bathroom", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is a source of water", "label": 0}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["uZesmtKZGSw", "zFjIWfSD-4"], "start_seconds": ["250", "410"], "properties": ["men, talk, cars", "People, motor, brakes"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", null], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has more cars", "label": 0}, {"captions": ["a man speaks as bees buzz and birds chirp", "a stream of water flows as people talk and wind blows"], "sample_ids": ["t25U-v4k4ts", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["bees buzz, birds chirp, man speaks", "stream, water, flow"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a man is speaking with wind noise in the background "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a male speaks over some small clicks", "birds chirp and objects are moved around"], "sample_ids": ["uXxVebHsGZ8", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["male, clicks, speak", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "children speak and play together"], "sample_ids": ["se87d6yxEOA", "yVVP8XvWJTo"], "start_seconds": ["10", "260"], "properties": ["run, whistle, pass", "children, speak, play"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "footage of a playground at a school or daycare center"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "children are speaking and breathing with background noise "], "question": "which entity is moving", "label": 0}, {"captions": ["an aircraft engine runs as wind blows heavily", "a duck quacks continuously"], "sample_ids": ["xjvTpk2Zpr8", "vh30P49Po6s"], "start_seconds": ["70", "30"], "properties": ["engine, run, wind", "quacks, continuously, duck"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "a duck quacks continuously"], "sample_ids": ["tK4VlLsNxak", "vh30P49Po6s"], "start_seconds": ["120", "30"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "quacks, continuously, duck"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a beep occurs briefly", "someone is typing on a computer keyboard"], "sample_ids": ["xtWeJ56-U-g", "v0x1odnXtP0"], "start_seconds": ["20", "210"], "properties": ["beep, occur, briefly", "keyboard, type, computer"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "how to make money on youtube in spanish"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "a person is typing on a keyboard"], "question": "which is not a type of computer", "label": 0}, {"captions": ["loud clanking and banging with brief male speech", "paper is crumpling consistently"], "sample_ids": ["sWZzXuWYY", "v5cSxLaHADY"], "start_seconds": ["420", "0"], "properties": ["male, speech, banging", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["birds vocalize and chirp continuously", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["w1mlz3Pe4fU", "wz7N8YRy74I"], "start_seconds": ["300", "30"], "properties": ["vocalize, chirp, continuously", "rooster, crow, background, men"], "captions_pred_video": ["of a bird in a cage", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["birds are chirping and singing", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a rooster?", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "a woman talks while a baby cries and a man whispers"], "sample_ids": ["y1saVTXsKwc", "smDKStoHBJo"], "start_seconds": ["80", "0"], "properties": ["a, dog, talk", "a, talk, baby, cry"], "captions_pred_video": ["a dog playing with a pink ball", "a man holding a crying baby in his arms"], "captions_pred_audio": ["a dog barks and a man speaks", "a baby is crying and a woman is speaking"], "question": "which entity has a dog?", "label": 0}, {"captions": ["a person speaks over rustling leaves", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zOZleIRqZm4", "yajyRTUQk3U"], "start_seconds": ["80", "400"], "properties": ["rustling, leaves, person", "a woman, something, fried"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "a muffled toilet flushes and the water drains"], "sample_ids": ["vzceMbklWc", "sfAvvZwdLCY"], "start_seconds": ["180", "20"], "properties": ["water, faucet, sink", "flushes, drains, water"], "captions_pred_video": [null, "footage of the toilet in the bathroom"], "captions_pred_audio": ["water is running and a man is speaking", "a toilet is flushed"], "question": "which entity has water running in it", "label": 0}, {"captions": ["food is frying while a woman speaks", "a woman speaks happily and an animal chirps"], "sample_ids": ["yhQ2Lg-7qDY", "uWAAAL4CIoc"], "start_seconds": ["130", "0"], "properties": ["food, woman, speak", "a woman, chirps, animal"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", null], "captions_pred_audio": ["a faucet is running and a man is speaking", "a woman is speaking and a dog is barking "], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a infant makes noise and is excited", "a loud engine muffles a man as he speaks"], "sample_ids": ["wIJK3-5y0kA", "xyx6eNVEYRY"], "start_seconds": ["30", "380"], "properties": ["noise, excited, infant", "loud, engine, muffles"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "footage of a helicopter landing on a runway at an airport"], "captions_pred_audio": ["a baby cries and a woman speaks", "an aircraft engine is running and a man is speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "an infant crying as a woman laughs"], "sample_ids": ["y2ZBGpgbhHM", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["animal, growl, bird", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["birds chirping and a dog panting", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "an engine runs loudly"], "sample_ids": ["vVhthZ45k3Y", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["cat, purr, hiss", "loud, engine, run"], "captions_pred_video": ["footage is blurry and out of focus", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["tDlysoZiA1I", "zl9Dqx-j7q4"], "start_seconds": ["0", "6"], "properties": ["animal, grunt, multiple", "engine, laugh, loud"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a car speeding up in the distance"], "sample_ids": ["su6FAOcOA8c", "u0TrcHhkPQ"], "start_seconds": ["4", "20"], "properties": ["engine, run, woman", "distance, car, speed"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", null], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a race car accelerates and revs its engine "], "question": "which car is speeding up in the distance", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["yaln9y8I7ms", "uEU-Hg5MTN8"], "start_seconds": ["230", "27"], "properties": ["female, flushes, toilet", "a woman, laughs, animal"], "captions_pred_video": ["footage is blurry and out of focus", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["an audience gives applause", "a telephone rings followed by a woman talking"], "sample_ids": ["x6iCUDmRpKQ", "tGcFnX0GHI"], "start_seconds": ["38", "0"], "properties": ["applause, audience, give", "ring, talk, woman"], "captions_pred_video": ["a black background with the moon and stars in the sky", null], "captions_pred_audio": ["a group of people are clapping and cheering", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "a dog barks and whimpers"], "sample_ids": ["ukxt9I7eMMg", "sShpyu2l4YQ"], "start_seconds": ["30", "0"], "properties": ["continuous, woman, speaking", "barks, whimpers, dog"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "the puppies are playing with a toy"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a dog is barking and growling"], "question": "which entity is a dog?", "label": 1}, {"captions": ["a telephone rings and a bird vocalizes", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["skd2PphS6oI", "uZesmtKZGSw"], "start_seconds": ["190", "250"], "properties": ["ring, bird, vocalize", "men, talk, cars"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a telephone bell rings repeatedly ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a person burps loudly for a long time nearby", "a propeller rotates loudly and intensely"], "sample_ids": ["vf44CgrjT0A", "ugHJF0hfYkg"], "start_seconds": ["20", "10"], "properties": ["loud, long, person", "loud, intense, propeller"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a loud burp", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["a dog barks and whimpers", "a piece of wood is being placed down and sawed"], "sample_ids": ["sShpyu2l4YQ", "uiItxDsDMFI"], "start_seconds": ["0", "30"], "properties": ["barks, whimpers, dog", "wood, piece, saw"], "captions_pred_video": ["the puppies are playing with a toy", "a man cutting a log with an axe in the woods"], "captions_pred_audio": ["a dog is barking and growling", "a saw is being used with background noise "], "question": "which entity is being cut", "label": 1}, {"captions": ["some men converse over an engine running", "water pouring and bubbling"], "sample_ids": ["sCiy7QS1U", "uyRfq-jKPpo"], "start_seconds": ["300", "50"], "properties": ["men, converse, engine", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "water is running from a faucet"], "question": "which entity is more likely to be in a kitchen", "label": 1}, {"captions": ["a man talks while vehicles pass by", "a man speaks with another voice speaking in the background"], "sample_ids": ["sK4u5T8hW78", "u21-Z5gJCB8"], "start_seconds": ["30", "30"], "properties": ["a, man, talk", "background, voice, man"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "- a person cooking eggs in a pan on the stove"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man talking while vehicles pass by?", "label": 0}, {"captions": ["wind blows strongly", "an infant crying as a woman laughs"], "sample_ids": ["w8uLijTqtlU", "xhmRY9yhC7c"], "start_seconds": ["70", "20"], "properties": ["wind, blows, strongly", "a, laugh, infant"], "captions_pred_video": ["footage is blurry and shaky", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["the wind is blowing strongly", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["yLy-WycbVVE", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["background, people, talk", "two men, woman, birds"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", null], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has a more natural background", "label": 1}, {"captions": ["a vehicle engine revs and tires squeal", "vehicles pass by on a roadway"], "sample_ids": ["yDoT73BWsdA", "tgbONvsP47Y"], "start_seconds": ["10", "0"], "properties": ["engine revs, tires squeal, vehicle", "pass, vehicle, roadway"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a car is driving on the road "], "question": "which vehicle is moving", "label": 1}, {"captions": ["a man speaking together with birds chirping and distant murmuring", "a woman speaks happily and an animal chirps"], "sample_ids": ["uiS58TNyUiw", "uWAAAL4CIoc"], "start_seconds": ["430", "0"], "properties": ["audio, man, speaking", "a woman, chirps, animal"], "captions_pred_video": ["of the pigeon in the cage", null], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a woman is speaking and a dog is barking "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["speaking following by laughing and clapping", "people speak as gunfire rings out"], "sample_ids": ["u2f5NpsoHBg", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["person, laugh, clap", "gunfire, ring, speak"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a man is speaking and a gun is fired"], "question": "which entity shows a person speaking and then laughing and clapping?", "label": 0}, {"captions": ["a vehicle accelerates and squeals tires", "a series of light horn beeps is followed by a loud steam whistle"], "sample_ids": ["yRx9txMcBl0", "wnpJndXuxLc"], "start_seconds": ["40", "50"], "properties": ["accelerates, tires, squeals", "beeps, loud, whistle"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["a door opens and birds chirp", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["yeFvk9x0wWI", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["door, open, birds", "a, scream, girl"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a vehicle engine accelerating then running on idle", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["vYkA3cfXp5Q", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["engine, accelerate, idle", "two men, woman, birds"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", null], "captions_pred_audio": ["an engine is idling", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a video of a vehicle engine accelerating then running on idle?", "label": 0}, {"captions": ["someone is typing on a computer keyboard", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["v0x1odnXtP0", "w5W5Kqtc8E"], "start_seconds": ["210", "100"], "properties": ["keyboard, type, computer", "wind, blow, vehicle"], "captions_pred_video": ["how to make money on youtube in spanish", null], "captions_pred_audio": ["a person is typing on a keyboard", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a helicopter engine idles continuously", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["ugHJF0hfYkg", "yajyRTUQk3U"], "start_seconds": ["10", "400"], "properties": ["engine, idle, continuously", "a woman, something, fried"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "a man speaks as a car is passing by"], "sample_ids": ["soTOh3zYJfY", "sK4u5T8hW78"], "start_seconds": ["40", "30"], "properties": ["vehicle, skid, tires", "a, car, pass"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking with background noise and breathing sounds "], "question": "which vehicle is skidding and squealing tires", "label": 0}, {"captions": ["a baby cries and a woman moans", "multiple adults speaking, and a child shouting in the background"], "sample_ids": ["smDKStoHBJo", "yks4cLgIDMc"], "start_seconds": ["0", "170"], "properties": ["a, cry, woman", "background, speaking, child"], "captions_pred_video": ["a man holding a crying baby in his arms", "footage of two kids wrestling on the floor"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking and a child is crying"], "question": "which entity has a child shouting in the background?", "label": 1}, {"captions": ["a child babbles as a woman speaks", "an infant crying as a woman laughs"], "sample_ids": ["wEBlkGWVWwE", "xhmRY9yhC7c"], "start_seconds": ["260", "20"], "properties": ["a, babble, woman", "a, laugh, infant"], "captions_pred_video": ["shows a person writing on the whiteboard", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a baby cries and a woman speaks"], "question": "which entity is a child", "label": 0}, {"captions": ["a person whistles and clicks a mouse", "vehicle tires screech and a man speaks before a car door opens"], "sample_ids": ["zCrAfDfv6-A", "sxYkFKFIZD0"], "start_seconds": ["30", "20"], "properties": ["person, mouse, click", "screech, man, door"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2"], "captions_pred_audio": ["a person whistles a song", "a man is speaking while a car is revving and accelerating with a squeal in the background "], "question": "which entity is about a car door opening?", "label": 1}, {"captions": ["a helicopter engine runs continuously", "an airplane engine runs"], "sample_ids": ["ugHJF0hfYkg", "yVPZ2MNWpms"], "start_seconds": ["10", "0"], "properties": ["engine, running, continuously", "engine, airplane, runs"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a helicopter is flying overhead ", "a car is driving by on the road "], "question": "which entity has a running engine", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["sLUnaPT5gM8", "ukg5L09Wpvo"], "start_seconds": ["0", "150"], "properties": ["loud, laughter, intermittent", "clickety-clack, train, whistle"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a train blows its whistle and blows its horn "], "question": "which entity is continuous", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["xfaoyyzw2WU", "uEU-Hg5MTN8"], "start_seconds": ["180", "27"], "properties": ["loud, jet engine, roar", "animal, grunts, snorts"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a woman is speaking and a baby is crying"], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "wind blowing followed by a zoom"], "sample_ids": ["zALy31PjDl0", "vr8ZXjEBhMQ"], "start_seconds": ["21", "150"], "properties": ["a man, a vehicle, a horn", "wind, blow, zoom"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "wind blows and a chainsaw cuts through wood "], "question": "which entity is about a man blowing a vehicle horn?", "label": 0}, {"captions": ["a man talks as something metal hits against and glass is set down", "small dogs yip and bark sharply"], "sample_ids": ["x6ijhqRY38s", "v-wcQf4BDY0"], "start_seconds": ["250", "120"], "properties": ["something metal, glass, hit", "bark, yip, sharply"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["w5W5Kqtc8E", "uZesmtKZGSw"], "start_seconds": ["100", "250"], "properties": ["wind, engine, scream", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about cars?", "label": 1}, {"captions": ["a stream runs then someone speaks", "a man speaks as a car is passing by"], "sample_ids": ["wbHTKEJZyhc", "sK4u5T8hW78"], "start_seconds": ["20", "30"], "properties": ["stream, run, someone", "a, car, pass"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a man speaking to a car passing by?", "label": 1}, {"captions": ["a stream runs then someone speaks", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wbHTKEJZyhc", "xKB8O8LTs6s"], "start_seconds": ["20", "70"], "properties": ["stream, run, someone", "music, gunfire, explosion"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more action", "label": 1}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "people applaud and hoot and chat quietly"], "sample_ids": ["wsHBIgzs9Fs", "wwyfGO2J4"], "start_seconds": ["50", "90"], "properties": ["horn, continuous, buzzing", "people, applaud, hoot"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", null], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a mechanical buzzing getting louder"], "sample_ids": ["vbZ-0lGPneg", "sEprKHm8Sj8"], "start_seconds": ["30", "90"], "properties": ["a woman, a television program, a bird", "noise, loud, buzzing"], "captions_pred_video": ["of a man holding a baby duck in his hands", "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a race car accelerates and revs its engine "], "question": "which entity is a noise", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "some tunes played by whistling"], "sample_ids": ["uWAAAL4CIoc", "u6BnG6YZqJ4"], "start_seconds": ["0", "0"], "properties": ["a woman, chirps, animal", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "a woman speaks with water running"], "sample_ids": ["sShpyu2l4YQ", "wTideSjRFS0"], "start_seconds": ["0", "30"], "properties": ["growl, bark, yip", "water, running, woman"], "captions_pred_video": ["the puppies are playing with a toy", "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["a dog is barking and growling", "a woman is speaking while water is running in the background"], "question": "which entity is more calm", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "a dog barks and whimpers"], "sample_ids": ["xM4joTqDVp4", "sShpyu2l4YQ"], "start_seconds": ["160", "0"], "properties": ["background, chirp, birds", "barks, whimpers, dog"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "the puppies are playing with a toy"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a dog is barking and growling"], "question": "which entity is more active", "label": 1}, {"captions": ["an engine runs loudly", "a horn rings out as a machine runs by"], "sample_ids": ["vqZuVbG6-HI", "slZLHwNbbt4"], "start_seconds": ["130", "300"], "properties": ["loud, engine, run", "a, horn, run"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a car speeding up in the distance", "a woman speaks with water running"], "sample_ids": ["u0TrcHhkPQ", "wTideSjRFS0"], "start_seconds": ["20", "30"], "properties": ["distance, car, speed", "water, running, woman"], "captions_pred_video": [null, "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking while water is running in the background"], "question": "which entity is moving at a slower speed", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "wind blows as people chatter quietly"], "sample_ids": ["y2ZBGpgbhHM", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["birds, tweet, pant", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["birds chirping and a dog panting", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "a duck quacks continuously"], "sample_ids": ["vzceMbklWc", "vh30P49Po6s"], "start_seconds": ["180", "30"], "properties": ["water, faucet, sink", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["water is running and a man is speaking", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "people speak as gunfire rings out"], "sample_ids": ["smDKStoHBJo", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["a, talk, baby, cry", "gunfire, ring, speak"], "captions_pred_video": ["a man holding a crying baby in his arms", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "an infant crying as a woman laughs"], "sample_ids": ["sG7TyPnFDR0", "xhmRY9yhC7c"], "start_seconds": ["180", "20"], "properties": ["beeps, machine, smoke alarm", "a, laugh, infant"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["speaking following by laughing and clapping", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["u2f5NpsoHBg", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["person, laugh, clap", "a woman, a television program, a bird"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird nearby?", "label": 1}, {"captions": ["goats bleat and people speak", "people speak as gunfire rings out"], "sample_ids": ["z5iUE5h0EPs", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["goats bleat, people speak, language", "gunfire, ring, speak"], "captions_pred_video": ["of the goat in the barn", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a goat bleats and a man speaks", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a man is filing a hard object"], "sample_ids": ["vzxHnu-SFEw", "vveS8HT7Uog"], "start_seconds": ["80", "100"], "properties": ["two objects, woman, speak", "a man, hard, object"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage is of a workbench with various tools on it including a hammer and a screwdriver"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is filing and speaking with background noise and breathing "], "question": "which object is harder to file", "label": 0}, {"captions": ["a man speaks on a radio as wind blows", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tDVADusiIoc", "yajyRTUQk3U"], "start_seconds": ["60", "400"], "properties": ["man, radio, blows", "a woman, something, fried"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["an electric engine works nearby followed by a child talking", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["xSKJGCItUWE", "xKB8O8LTs6s"], "start_seconds": ["10", "70"], "properties": ["engine, work, child", "music, gunfire, explosion"], "captions_pred_video": ["footage of the helicopter flying in the room", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["a kid speaks followed by music playing", "an infant crying as a woman laughs"], "sample_ids": ["tQWGZLItBXk", "xhmRY9yhC7c"], "start_seconds": ["170", "20"], "properties": ["music, kid, speak", "a, laugh, infant"], "captions_pred_video": ["worms revolution screenshots", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["tDVADusiIoc", "su6FAOcOA8c"], "start_seconds": ["60", "4"], "properties": ["man, radio, blows", "engine, idle, woman"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a motorcycle engine works nearby", "multiple people speak and children yell while water gurgles"], "sample_ids": ["tOSWIURC-4", "vb1fPSDI4c"], "start_seconds": ["0", "30"], "properties": ["engine, work, nearby", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a lawn mower is running ", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "a stream of water runs briefly"], "sample_ids": ["vXlk0lIQBFo", "x-PeY8Yb8M4"], "start_seconds": ["470", "300"], "properties": ["wind, speak, vocalize", "stream, water, run"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "water flows and trickles"], "sample_ids": ["w-4gHptFNuU", "tB7hWb9gTuQ"], "start_seconds": ["21", "30"], "properties": ["engine revs, accelerates, bump", "water, flow, trickle"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a car accelerates and revs its engine ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["w-4gHptFNuU", "xKB8O8LTs6s"], "start_seconds": ["21", "70"], "properties": ["engine revs, accelerates, bump", "music, gunfire, explosion"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a car accelerates and revs its engine ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["someone snores nearby", "people applaud and hoot and chat quietly"], "sample_ids": ["spJCm8tD9Zo", "wwyfGO2J4"], "start_seconds": ["90", "90"], "properties": ["someone snores, nearby, someone", "people, applaud, hoot"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["someone whistles a song", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sIXTftIuUgw", "sLUnaPT5gM8"], "start_seconds": ["90", "0"], "properties": ["someone, song, whistle", "loud, laughter, intermittent"], "captions_pred_video": [null, "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a person whistling a song", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a woman speaks happily and an animal chirps", "small dogs yip and bark sharply"], "sample_ids": ["uWAAAL4CIoc", "v-wcQf4BDY0"], "start_seconds": ["0", "120"], "properties": ["a woman, chirps, animal", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a drill runs and two people laugh", "a duck quacks continuously"], "sample_ids": ["tEE3MpBt1sg", "vh30P49Po6s"], "start_seconds": ["50", "30"], "properties": ["two people, laugh, drill", "quacks, continuously, duck"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "paper is crumpling consistently"], "sample_ids": ["uWAAAL4CIoc", "v5cSxLaHADY"], "start_seconds": ["0", "0"], "properties": ["a woman, chirps, animal", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["electronic beeps occur in a short series", "a vehicle engine accelerating then running on idle"], "sample_ids": ["y682ml90jGw", "vYkA3cfXp5Q"], "start_seconds": ["11", "30"], "properties": ["beeps, series, electronic", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a beeping sound is being made ", "an engine is idling"], "question": "which entity is a series of beeps", "label": 0}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "three men talk while wind blows and some liquid flows"], "sample_ids": ["ylpYOorfH4o", "vJ7JPEFhyLA"], "start_seconds": ["410", "16"], "properties": ["motor, run, steady", "three men, wind, flow"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "water flows and trickles"], "sample_ids": ["vbZ-0lGPneg", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["a woman, a television program, a bird", "water, flow, trickle"], "captions_pred_video": ["of a man holding a baby duck in his hands", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a sleeping person emits a gravely snore", "wind blowing followed by a zoom"], "sample_ids": ["w2JXXIAdUdg", "vr8ZXjEBhMQ"], "start_seconds": ["10", "150"], "properties": ["emits, sleeping, person", "wind, blow, zoom"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a person snoring and a dog whimpering", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "water splashes as an animal walks through"], "sample_ids": ["uOpoD0gGXcs", "w1ir-sZ3Im8"], "start_seconds": ["120", "90"], "properties": ["chirps, woman, bird", "animal, water, splashes"], "captions_pred_video": ["a herd of cows grazing in the field", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["birds are chirping and a man is speaking", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "insects humming with a dog barking and small goat bleating"], "sample_ids": ["wyllXV6PjKo", "tIY7qOV3rEM"], "start_seconds": ["30", "0"], "properties": ["a kid, talk, cry", "animal, bark, dog, barking, small, goat, bleating"], "captions_pred_video": [null, "a dog is standing in the middle of a dirt road in the woods"], "captions_pred_audio": ["a woman speaks and a baby cries", "a dog is barking and a cat is meowing"], "question": "which entity has more animals", "label": 1}, {"captions": ["a bird is chirping and tweeting a bird song", "a car revs and accelerates loudly and men and women chatter among themselves"], "sample_ids": ["wPz6QRAkEb4", "y8dSeubCNI"], "start_seconds": ["60", "4"], "properties": ["chirps, tweets, song", "men, women, car"], "captions_pred_video": ["a bird in a cage on top of a pole", null], "captions_pred_audio": ["birds are chirping in the background ", "an engine revving and people talking in the background"], "question": "which entity is more quiet", "label": 0}, {"captions": ["race cars go around a track as a man commentates", "pigeons vocalize and birds chirp"], "sample_ids": ["uZesmtKZGSw", "uiS58TNyUiw"], "start_seconds": ["250", "430"], "properties": ["car, track, man", "vocalize, bird, chirp"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a person", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "a man speaks as insects buzz and a bird chirps"], "sample_ids": ["ul60S8TXDA8", "t25U-v4k4ts"], "start_seconds": ["60", "40"], "properties": ["sound, distance, bell", "a, chirps, bird"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a man is speaking and bees are buzzing"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a kid speaks followed by music playing", "water flows and trickles"], "sample_ids": ["tQWGZLItBXk", "tB7hWb9gTuQ"], "start_seconds": ["170", "30"], "properties": ["music, kid, speak", "water, flow, trickle"], "captions_pred_video": ["worms revolution screenshots", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a beep repeats multiple times", "a man speaks followed by another man speaking outside"], "sample_ids": ["y682ml90jGw", "viuTg1M-dqg"], "start_seconds": ["11", "30"], "properties": ["beep, repeat, multiple", "two men, speak, follow"], "captions_pred_video": [null, "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a beeping sound is being made ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a single speaker?", "label": 0}, {"captions": ["a man speaks as music plays before artillery is fired", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vcmWSmvti8", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["music, man, fire", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp as a train approaches", "water pouring and bubbling"], "sample_ids": ["xM4joTqDVp4", "uyRfq-jKPpo"], "start_seconds": ["160", "50"], "properties": ["bird, chirp, train", "water, bubbles, pouring"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["birds are chirping and a train is moving ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["rustling with distant murmuring", "birds chirp and objects are moved around"], "sample_ids": ["wnNNcxAPwGQ", "yPUYU6t3rwo"], "start_seconds": ["0", "370"], "properties": ["sound, distance, rustling", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of a yellow truck doing a burnout on a race track", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a crowd of people are talking and laughing while a skateboard rolls by ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["water pouring and bubbling", "a stream of water runs briefly"], "sample_ids": ["uyRfq-jKPpo", "x-PeY8Yb8M4"], "start_seconds": ["50", "300"], "properties": ["water, bubbles, pouring", "stream, water, run"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["water is running from a faucet", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "a person uses a saw to cut some wood"], "sample_ids": ["yRx9txMcBl0", "sHbXC6na9hg"], "start_seconds": ["40", "0"], "properties": ["accelerates, tires, squeals", "a person, saw, wood"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a man using a tractor to cut a log into firewood youtube"], "captions_pred_audio": ["a car is revving its engine and skidding ", "an engine is idling and vibrating"], "question": "which entity is stationary", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["xyL9F5VrjkE", "xfaoyyzw2WU"], "start_seconds": ["20", "180"], "properties": ["engine, run, wind", "loud, jet engine, roar"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "an aircraft engine roars and a man speaks "], "question": "which engine is louder", "label": 1}, {"captions": ["a vehicle engine revs and tires squeal", "a vehicle accelerates before a race car idles then accelerates quickly"], "sample_ids": ["yDoT73BWsdA", "sjlVMgdGSK0"], "start_seconds": ["10", "30"], "properties": ["engine revs, tires squeal, vehicle", "accelerates, vehicle, race car"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "a 1965 ford falcon drag racing at 100mph on a 1/8 mile track"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a car accelerates and revs its engine "], "question": "which vehicle is accelerating", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "bird squawks are accompanied by a man and woman speaking"], "sample_ids": ["weDbePuc-Xc", "wqZ135Ssz0"], "start_seconds": ["40", "60"], "properties": ["music, slaps, human", "man, woman, squawks"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", null], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has a man and woman speaking?", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "a child speaks in closed space"], "sample_ids": ["tIY7qOV3rEM", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "child, space, speak"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "a clock ticktocks"], "sample_ids": ["vcmWSmvti8", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["music, man, fire", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a helicopter engine runs", "birds chirp and pigeons vocalize while walking around"], "sample_ids": ["t5ZbXbniOWk", "wIvYjuR3nrg"], "start_seconds": ["30", "9"], "properties": ["engine, helicopter, run", "birds, pigeons, vocalize"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "footage of a pigeon sitting on a roof with trees in the background"], "captions_pred_audio": ["a helicopter is flying overhead ", "birds are chirping and cooing"], "question": "which entity is a bird?", "label": 1}, {"captions": ["an insect buzzes around continuously", "a toilet flushes and a female speaks"], "sample_ids": ["v25l1jef3JY", "yaln9y8I7ms"], "start_seconds": ["0", "230"], "properties": ["buzzes, continuously, insect", "female, flushes, toilet"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "footage is blurry and out of focus"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a toilet flushes and a man speaks"], "question": "which entity is not a person", "label": 0}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "dishes cling together then a man begins to speak"], "sample_ids": ["uPDn2BFTHk", "sQGXqGcwOTc"], "start_seconds": ["140", "3"], "properties": ["lady, laugh, baby", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a baby laughs and a woman speaks", "mechanisms are operating and water is splashing "], "question": "which entity is about a baby and a lady?", "label": 0}, {"captions": ["a vehicle engine runs and someone speaks", "paper is crumpling consistently"], "sample_ids": ["zF8yoL0rkbI", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["engine, run, someone", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of the traffic on the street at night", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "an insect buzzes around continuously"], "sample_ids": ["zl9Dqx-j7q4", "v25l1jef3JY"], "start_seconds": ["6", "0"], "properties": ["motors rev, laugh, loudly", "buzzes, continuously, insect"], "captions_pred_video": ["footage of a man driving a car in the dark", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a jet engine roars ", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "rain falls on a surface as men speak and music plays"], "sample_ids": ["vms5XGTDVQc", "w0xsN8X18Y"], "start_seconds": ["220", "30"], "properties": ["paper, crumpled, crinkled", "music, surface, rain"], "captions_pred_video": ["footage of a woman opening a black bag on a table", null], "captions_pred_audio": ["paper is crumpled and crinkled", "a man is speaking while a motorboat is moving in the background "], "question": "which entity is not a video of rain falling on a surface?", "label": 0}, {"captions": ["a toilet flushes and a female speaks", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["yaln9y8I7ms", "ukg5L09Wpvo"], "start_seconds": ["230", "150"], "properties": ["female, flushes, toilet", "clickety-clack, train, whistle"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "water splashes as an animal walks through"], "sample_ids": ["vveS8HT7Uog", "w1ir-sZ3Im8"], "start_seconds": ["100", "90"], "properties": ["a man, objects, speak", "animal, water, splashes"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["loud, continuous burping", "birds chirp and pigeons vocalize while walking around"], "sample_ids": ["y636gklDioE", "wIvYjuR3nrg"], "start_seconds": ["20", "9"], "properties": ["loud, continuous, burping", "birds, pigeons, vocalize"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "footage of a pigeon sitting on a roof with trees in the background"], "captions_pred_audio": ["a person burps loudly several times", "birds are chirping and cooing"], "question": "which entity is not a human", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "paper is crumpling consistently"], "sample_ids": ["s4Uz1Ffgo04", "v5cSxLaHADY"], "start_seconds": ["100", "0"], "properties": ["water, rushes, vehicle", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["speaking following by laughing and clapping", "a stream of water runs briefly"], "sample_ids": ["u2f5NpsoHBg", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["person, laugh, clap", "stream, water, run"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a consistent ticking pattern", "pigeons vocalize and birds chirp"], "sample_ids": ["sCeWURVHfOM", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["ticking, pattern, clock", "vocalize, bird, chirp"], "captions_pred_video": ["- a close-up view of the clock's inner workings", "of the pigeon in the cage"], "captions_pred_audio": ["ticking of a clock", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["dogs bark as an engine runs and a person whistles", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["zY3icUyMdh8", "uEU-Hg5MTN8"], "start_seconds": ["20", "27"], "properties": ["dog, bark, engine", "animal, grunts, snorts"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a woman is speaking and a baby is crying"], "question": "which entity is more playful", "label": 1}, {"captions": ["small dogs yip and bark sharply", "a saw finishes running as metal clings in the background"], "sample_ids": ["v-wcQf4BDY0", "zofjfKhqLk8"], "start_seconds": ["120", "10"], "properties": ["bark, yip, sharply", "background, metal, clings"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "footage of a man using a machine to cut a piece of wood"], "captions_pred_audio": ["a dog barks and growls", "a large engine is running and a bell is ringing"], "question": "which entity is a still image?", "label": 0}, {"captions": ["a dog whimpers and a woman briefly talks", "dog barking and vehicle engine idling followed shortly by vehicle engine revving"], "sample_ids": ["y1saVTXsKwc", "zY3icUyMdh8"], "start_seconds": ["80", "20"], "properties": ["a, dog, talk", "dog, bark, engine"], "captions_pred_video": ["a dog playing with a pink ball", "footage of a bus driving through a residential street at night"], "captions_pred_audio": ["a dog barks and a man speaks", "a car is driving and dogs are barking and squealing "], "question": "which entity is more active", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "a machine beeps continuously"], "sample_ids": ["vJvryTwuAV8", "y682ml90jGw"], "start_seconds": ["16", "11"], "properties": ["audience, cheer, man", "beeps, machine, continuously"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", null], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "a duck quacks continuously"], "sample_ids": ["xZepNM9qcRA", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["background, motor, run", "quacks, continuously, duck"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "some men converse over an engine running"], "sample_ids": ["yRx9txMcBl0", "sCiy7QS1U"], "start_seconds": ["40", "300"], "properties": ["accelerates, tires, squeals", "men, converse, engine"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", null], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["bees buzz as wind blows", "a man talks as something metal hits against and glass is set down"], "sample_ids": ["tMJne1a4AFI", "x6ijhqRY38s"], "start_seconds": ["0", "250"], "properties": ["bees, buzz, wind", "something metal, glass, hit"], "captions_pred_video": ["a swarm of bees on the ground", "a chef preparing a dish with a bottle of wine and a plate of food on a table"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking and dishes are clanging "], "question": "which entity is not a video of something hitting something?", "label": 0}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "some tunes played by whistling"], "sample_ids": ["uRExseg-0XI", "u6BnG6YZqJ4"], "start_seconds": ["210", "0"], "properties": ["woman, man, water", "tune, play, whistling"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["someone whistles briefly", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["uFoga8sHpiw", "zl9Dqx-j7q4"], "start_seconds": ["90", "6"], "properties": ["sound, duration, pitch", "engine, laugh, loud"], "captions_pred_video": ["footage of a bird in a cage", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a person whistles a song", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "a duck quacks continuously"], "sample_ids": ["sSMl2vc3ek", "vh30P49Po6s"], "start_seconds": ["20", "30"], "properties": ["a person, laughs, snores", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a person snoring loudly", "a duck is quacking loudly"], "question": "which entity is a noise", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["yajyRTUQk3U", "y8WEcpOlT3I"], "start_seconds": ["400", "40"], "properties": ["a woman, something, fried", "harsh, wind, blows"], "captions_pred_video": ["- a woman cooking in the kitchen", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking with wind noise in the background "], "question": "which entity is about cooking?", "label": 0}, {"captions": ["an aircraft engine runs", "multiple adults speaking, and a child shouting in the background"], "sample_ids": ["yLCORCnd35Q", "yks4cLgIDMc"], "start_seconds": ["0", "170"], "properties": ["engine, aircraft, runs", "background, speaking, child"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "footage of two kids wrestling on the floor"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a man is speaking and a child is crying"], "question": "which entity has a child shouting in the background", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["yZmhM1HcsyE", "tiDFTC-5vU"], "start_seconds": ["4", "30"], "properties": ["engine, roar, water", "male, duck, laugh"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", null], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "a man speaks followed by another man speaking outside"], "sample_ids": ["t69a8aRKhmc", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["a, b, c", "two men, speak, follow"], "captions_pred_video": ["footage is blurry and out of focus", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking followed by another man speaking outside?", "label": 1}, {"captions": ["a horse runs while two women talk", "small dogs yip and bark sharply"], "sample_ids": ["sdvI1mHAsc", "v-wcQf4BDY0"], "start_seconds": ["20", "120"], "properties": ["two women, horse, run", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "an infant crying as a woman laughs"], "sample_ids": ["yaln9y8I7ms", "xhmRY9yhC7c"], "start_seconds": ["230", "20"], "properties": ["female, flushes, toilet", "a, laugh, infant"], "captions_pred_video": ["footage is blurry and out of focus", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "roadway noise occurs and a truck accelerates"], "sample_ids": ["sDSppXIlJrs", "tgbONvsP47Y"], "start_seconds": ["27", "0"], "properties": ["microphone, water, wind", "noise, truck, accelerate"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "footage of a fire truck entering a garage"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a car is driving on the road "], "question": "which noise is made by a truck", "label": 1}, {"captions": ["a person is snoring while sleeping", "water drips and bubbles as a man speaks"], "sample_ids": ["vJrjSeP17yE", "vSeGhaZt-aI"], "start_seconds": ["40", "50"], "properties": ["a person is sleeping, snoring, person", "water, bubbles, speak"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking and pouring liquid with background noise "], "question": "which entity is a video", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["uKCSGgof8gI", "tdWhHV3X25Q"], "start_seconds": ["12", "60"], "properties": ["chirps, distance, signal", "applause, audience, yells"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "a motorcycle idles loudly as wind blows"], "sample_ids": ["sYITalLZjj4", "v7jJS8aAyA"], "start_seconds": ["30", "10"], "properties": ["water, rushes, background, birds", "wind, blows, loudly"], "captions_pred_video": ["two ducks are swimming in the water near each other", null], "captions_pred_audio": ["wind blows and birds chirp", "a motorcycle engine is idling and vibrating"], "question": "which entity is louder", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "an insect buzzes around continuously"], "sample_ids": ["v0x1odnXtP0", "v25l1jef3JY"], "start_seconds": ["210", "0"], "properties": ["keyboard, type, computer", "buzzes, continuously, insect"], "captions_pred_video": ["how to make money on youtube in spanish", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a person is typing on a keyboard", "a fly is buzzing around a microphone "], "question": "which entity is not a person?", "label": 1}, {"captions": ["some men converse over an engine running", "people speak as gunfire rings out"], "sample_ids": ["sCiy7QS1U", "wqTCwqVRDlk"], "start_seconds": ["300", "80"], "properties": ["men, converse, engine", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war", "label": 1}, {"captions": ["birds fly and flutter around", "wind blows as people chatter quietly"], "sample_ids": ["wGKgwOP3h30", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["fly, flutter, around", "wind, chatter, people"], "captions_pred_video": ["of the pigeons in the coop", "footage is blurry and out of focus"], "captions_pred_audio": ["pigeons coo and flap their wings", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "some liquid flows while a woman laughs and man talks"], "sample_ids": ["vdoxuJn9lTc", "vddP56-ogds"], "start_seconds": ["40", "30"], "properties": ["burp, loud, girl", "liquid, laughs, man"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", null], "captions_pred_audio": ["a child speaks followed by a burp", "water is running and gurgling and a man is speaking"], "question": "which entity has a man talking?", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["y4tPJXBKDig", "ziUT9IFTkjg"], "start_seconds": ["20", "10"], "properties": ["a, noise, talk", "background, birds, rustling"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", null], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "birds are chirping and a chime is ringing "], "question": "which noise is made by a girl", "label": 0}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "people applaud and hoot and chat quietly"], "sample_ids": ["zkKdxzNC97Y", "wwyfGO2J4"], "start_seconds": ["27", "90"], "properties": ["loud, bang, noise", "people, applaud, hoot"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "a vehicle accelerates and squeals tires"], "sample_ids": ["t8CV69hcvF0", "yRx9txMcBl0"], "start_seconds": ["210", "40"], "properties": ["person, sneeze, follow", "accelerates, tires, squeals"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a woman sneezes and speaks", "a car is revving its engine and skidding "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["y8dSeubCNI", "wz7N8YRy74I"], "start_seconds": ["4", "30"], "properties": ["engine revving, people speaking, motorcycle", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["an engine revving and people talking in the background", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "an insect buzzes around continuously"], "sample_ids": ["smDKStoHBJo", "v25l1jef3JY"], "start_seconds": ["0", "0"], "properties": ["a, talk, baby, cry", "buzzes, continuously, insect"], "captions_pred_video": ["a man holding a crying baby in his arms", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a fly is buzzing around a microphone "], "question": "which entity is not a person?", "label": 1}, {"captions": ["a person sniffs and sneezes", "a woman and man speak while food is frying"], "sample_ids": ["uRlbY6aoBU", "zk-xJGQU8-4"], "start_seconds": ["0", "130"], "properties": ["sneezes, person, sniffs", "food, man, woman"], "captions_pred_video": [null, "a man and a woman cooking in a wok on the stove"], "captions_pred_audio": ["a man is sneezing ", "a woman is speaking while dishes are clanging and music is playing in the background "], "question": "which entity is a group of people", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a frog croaks as other frogs croak in the background"], "sample_ids": ["vbZ-0lGPneg", "yswmmRZFItk"], "start_seconds": ["30", "0"], "properties": ["a woman, a television program, a bird", "background, frog, croak"], "captions_pred_video": ["of a man holding a baby duck in his hands", "a close up of a frog in the water"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a frog is croaking"], "question": "which entity is a croaker", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "people speak as gunfire rings out"], "sample_ids": ["u--KhUW8l1Y", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["horn, siren, life", "gunfire, ring, speak"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["y4tPJXBKDig", "uEU-Hg5MTN8"], "start_seconds": ["20", "27"], "properties": ["a, noise, talk", "animal, grunts, snorts"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a woman is speaking and a baby is crying"], "question": "which entity is a person talking", "label": 0}, {"captions": ["birds chirp then an animal grunts", "some liquid flows while a woman laughs and man talks"], "sample_ids": ["tDlysoZiA1I", "vddP56-ogds"], "start_seconds": ["0", "30"], "properties": ["animal, grunt, chirp", "liquid, laughs, man"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", null], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "water is running and gurgling and a man is speaking"], "question": "which entity is about a woman and a man?", "label": 1}, {"captions": ["birds chirp as a train approaches", "a toilet flushes and water drains"], "sample_ids": ["xM4joTqDVp4", "sfAvvZwdLCY"], "start_seconds": ["160", "20"], "properties": ["bird, chirp, train", "water drains, flushes, water"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "footage of the toilet in the bathroom"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["someone is burping continuously", "three men talk while wind blows and some liquid flows"], "sample_ids": ["y636gklDioE", "vJ7JPEFhyLA"], "start_seconds": ["20", "16"], "properties": ["burps, burps, burps", "three men, wind, flow"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a person burps loudly several times", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wy1eKjR7KC0", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["people, talk, distance", "music, gunfire, explosion"], "captions_pred_video": ["two police officers riding motorcycles down the street", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and a siren is going off", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "some tunes played by whistling"], "sample_ids": ["vb1fPSDI4c", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["multiple, people, yell", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["xjvTpk2Zpr8", "w5W5Kqtc8E"], "start_seconds": ["70", "100"], "properties": ["engine, run, wind", "wind, blow, vehicle"], "captions_pred_video": ["footage of a dhl plane landing on the runway", null], "captions_pred_audio": ["a jet engine roars and wind blows ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "pigeons vocalize and birds chirp"], "sample_ids": ["zFjIWfSD-4", "uiS58TNyUiw"], "start_seconds": ["410", "430"], "properties": ["People, motor, brakes", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a car accelerates and wind blows", "a duck quacks continuously"], "sample_ids": ["u0TrcHhkPQ", "vh30P49Po6s"], "start_seconds": ["20", "30"], "properties": ["accelerates, wind, blows", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["vddP56-ogds", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["liquid, laughs, man", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running?", "label": 1}, {"captions": ["motors runs briefly and tires screech", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["yRx9txMcBl0", "yDoT73BWsdA"], "start_seconds": ["40", "10"], "properties": ["motors, tires, screech", "engine, revs, vehicle"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a race car accelerates and revs its engine "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["wind blows and a stream of water flows nearby", "vehicles pass by on a roadway"], "sample_ids": ["sYITalLZjj4", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["stream, flow, wind", "pass, vehicle, roadway"], "captions_pred_video": ["two ducks are swimming in the water near each other", "footage of a fire truck entering a garage"], "captions_pred_audio": ["wind blows and birds chirp", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "water rushes by"], "sample_ids": ["zuua6-5goWw", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["birds, chirp, quiet, man, speaks", "water, rushes, by"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a car is driving on a wet road "], "question": "which entity is moving faster", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "an adult woman speaks over chopping and silverware noises"], "sample_ids": ["vXlk0lIQBFo", "yYJksgsxx5U"], "start_seconds": ["470", "30"], "properties": ["wind, talk, vocalize", "audio, woman, silverware"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "of a woman slicing an orange on a cutting board"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a woman is speaking and dishes are clanging in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["yajyRTUQk3U", "uYT5gxnyMWM"], "start_seconds": ["400", "50"], "properties": ["a woman, something, fried", "a, scream, girl"], "captions_pred_video": ["- a woman cooking in the kitchen", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a woman is speaking and a baby is crying"], "question": "which entity is about a girl speaking?", "label": 1}, {"captions": ["wind blowing and birds chirping with the distant cooing of a large bird", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wRBHTgrbiwg", "xKB8O8LTs6s"], "start_seconds": ["50", "70"], "properties": ["birds, chirp, cooing", "music, gunfire, explosion"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["someone is burping continuously", "a man speaks as a motor runs in the background"], "sample_ids": ["y636gklDioE", "xZepNM9qcRA"], "start_seconds": ["20", "30"], "properties": ["burps, burps, burps", "background, motor, run"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a person burps loudly several times", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "dishes cling together then a man begins to speak"], "sample_ids": ["uZesmtKZGSw", "sQGXqGcwOTc"], "start_seconds": ["250", "3"], "properties": ["men, talk, cars", "cling, speak, dishes"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["vehicles pass by on a roadway", "a car speeding up in the distance"], "sample_ids": ["tgbONvsP47Y", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["pass, vehicle, roadway", "distance, car, speed"], "captions_pred_video": ["footage of a fire truck entering a garage", null], "captions_pred_audio": ["a car is driving on the road ", "a race car accelerates and revs its engine "], "question": "which vehicle is moving faster", "label": 1}, {"captions": ["wind blows strongly and a young man speaks", "running water in a faucet with some clinks"], "sample_ids": ["vs65y4qmyBE", "zNRChLjqcU"], "start_seconds": ["340", "220"], "properties": ["wind, blows, strongly", "water, faucet, run"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "water is running from a faucet into a sink"], "question": "which entity is a liquid", "label": 1}, {"captions": ["someone whistles a tune", "a man speaks then multiple motorcycles pass by"], "sample_ids": ["sIXTftIuUgw", "zcDwZ6W7E3E"], "start_seconds": ["90", "180"], "properties": ["someone, tune, whistle", "a, man, speak"], "captions_pred_video": [null, "2 people riding motorcycles down a mountain road with trees lining the sides of the road"], "captions_pred_audio": ["a person whistling a song", "a man is speaking while a car accelerates and revs its engine "], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a man speaks as a machine runs", "winds blows roughly as a vehicle races past"], "sample_ids": ["vD6lYD1l0BY", "xjvTpk2Zpr8"], "start_seconds": ["330", "70"], "properties": ["a, machine, run", "wind, blows, vehicle"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "wind blows as people chatter quietly"], "sample_ids": ["zkKdxzNC97Y", "xBxDz0CFVn0"], "start_seconds": ["27", "30"], "properties": ["loud, bang, noise", "wind, chatter, people"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage is blurry and out of focus"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "water flows and trickles"], "sample_ids": ["vhJWZheqaE", "tB7hWb9gTuQ"], "start_seconds": ["0", "30"], "properties": ["water drains unevenly, toilet flushes, water drains", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a toilet is flushed", "water is splashing and gurgling"], "question": "which entity is a source of water", "label": 1}, {"captions": ["people clap and speak in the distance", "waves crash against a shoreline and people speak"], "sample_ids": ["wwyfGO2J4", "yFB25fqfU8I"], "start_seconds": ["90", "300"], "properties": ["clap, distance, speak", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be a video of a performance?", "label": 0}, {"captions": ["small dogs growl, bark and yip.", "small dogs yip and bark sharply"], "sample_ids": ["sShpyu2l4YQ", "v-wcQf4BDY0"], "start_seconds": ["0", "120"], "properties": ["growl, bark, yip", "bark, yip, sharply"], "captions_pred_video": ["the puppies are playing with a toy", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a dog is barking and growling", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["t69a8aRKhmc", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["a, b, c", "men, talk, cars"], "captions_pred_video": ["footage is blurry and out of focus", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a woman and man speak while food is frying", "an airplane engine spools and people speak"], "sample_ids": ["zk-xJGQU8-4", "wTjoRj1se3U"], "start_seconds": ["130", "390"], "properties": ["food, man, woman", "airplane, engine, spool"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a jet engine is running and people are talking"], "question": "which entity is about a plane?", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "some men converse over an engine running"], "sample_ids": ["tDlfY3nmx1A", "sCiy7QS1U"], "start_seconds": ["160", "300"], "properties": ["applause, laugh, man", "men, converse, engine"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", null], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["an airplane engine runs", "birds chirp and pigeons vocalize while walking around"], "sample_ids": ["yVPZ2MNWpms", "wIvYjuR3nrg"], "start_seconds": ["0", "9"], "properties": ["engine, airplane, runs", "birds, pigeons, vocalize"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "footage of a pigeon sitting on a roof with trees in the background"], "captions_pred_audio": ["a car is driving by on the road ", "birds are chirping and cooing"], "question": "which entity is a bird?", "label": 1}, {"captions": ["distant humming of an engine", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["yVPZ2MNWpms", "zl9Dqx-j7q4"], "start_seconds": ["0", "6"], "properties": ["sound, distance, engine", "engine, laugh, loud"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a car is driving by on the road ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a motor idles, accelerates, then slows down.", "water flows as men speak and yell"], "sample_ids": ["vYkA3cfXp5Q", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["speed, idle, accelerate", "water, flow, men"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["an engine is idling", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a motor?", "label": 0}, {"captions": ["an airplane engine runs", "a infant makes noise and is excited"], "sample_ids": ["yVPZ2MNWpms", "wIJK3-5y0kA"], "start_seconds": ["0", "30"], "properties": ["engine, airplane, runs", "noise, excited, infant"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a car is driving by on the road ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["an airplane engine spools and people speak", "water flows and trickles"], "sample_ids": ["wTjoRj1se3U", "tB7hWb9gTuQ"], "start_seconds": ["390", "30"], "properties": ["airplane, engine, spool", "water, flow, trickle"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a jet engine is running and people are talking", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["white noise and birds chirping", "vehicles pass by on a roadway"], "sample_ids": ["wRBHTgrbiwg", "tgbONvsP47Y"], "start_seconds": ["50", "0"], "properties": ["noise, white, chirping", "pass, vehicle, roadway"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of a fire truck entering a garage"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a car is driving on the road "], "question": "which is a moving object", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "people cheer as a vehicle engine revs"], "sample_ids": ["xfaoyyzw2WU", "xjhAnI2q6hM"], "start_seconds": ["180", "6"], "properties": ["loud, jet engine, roar", "engine revs, vehicle, people"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a truck is revving its engine and a man is speaking "], "question": "which is louder", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "a car speeding up in the distance"], "sample_ids": ["uOpoD0gGXcs", "u0TrcHhkPQ"], "start_seconds": ["120", "20"], "properties": ["chirps, woman, bird", "distance, car, speed"], "captions_pred_video": ["a herd of cows grazing in the field", null], "captions_pred_audio": ["birds are chirping and a man is speaking", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "paper folding and crinkling"], "sample_ids": ["vveS8HT7Uog", "zPpG3RD8lSs"], "start_seconds": ["100", "20"], "properties": ["a man, objects, speak", "paper, fold, crinkle"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "the wind blows and a mouse clicks "], "question": "which object is being folded and crinkled", "label": 0}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "small dogs yip and bark sharply"], "sample_ids": ["yYEVLuqEytU", "v-wcQf4BDY0"], "start_seconds": ["40", "120"], "properties": ["animal, pig, background", "bark, yip, sharply"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["electronic beeps occur in a short series", "a stream of water runs briefly"], "sample_ids": ["y682ml90jGw", "x-PeY8Yb8M4"], "start_seconds": ["11", "300"], "properties": ["beeps, series, electronic", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a beeping sound is being made ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["vqZuVbG6-HI", "wqZ135Ssz0"], "start_seconds": ["130", "60"], "properties": ["background, male, female", "two men, woman, birds"], "captions_pred_video": ["footage is blurry because it's raining outside", null], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a mechanical buzzing getting louder", "a duck quacks loudly and continuously"], "sample_ids": ["sEprKHm8Sj8", "vh30P49Po6s"], "start_seconds": ["90", "30"], "properties": ["noise, loud, buzzing", "loud, continuous, quacks"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "a child speaks in closed space"], "sample_ids": ["u21-Z5gJCB8", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["background, voice, man", "child, space, speak"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "water is sprayed across a hard surface"], "sample_ids": ["w5W5Kqtc8E", "sQwlkXjQabo"], "start_seconds": ["100", "10"], "properties": ["wind, blow, vehicle", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "winds blows roughly as a vehicle races past"], "sample_ids": ["sxYkFKFIZD0", "xjvTpk2Zpr8"], "start_seconds": ["20", "70"], "properties": ["screech, man, door", "wind, blows, vehicle"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a jet engine roars and wind blows "], "question": "which entity is about a vehicle racing past?", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "an insect buzzes around continuously"], "sample_ids": ["vveS8HT7Uog", "v25l1jef3JY"], "start_seconds": ["100", "0"], "properties": ["a man, objects, speak", "buzzes, continuously, insect"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "male speech with light ticking"], "sample_ids": ["vKrYfzleLB8", "xO-Q2BlIIPU"], "start_seconds": ["110", "30"], "properties": ["a, ring, gunshots", "male, speech, ticking"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", "a clock with a green glowing display showing the time 09 07 2016 12 31 2016"], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "small dogs growl, bark and yip."], "sample_ids": ["zofjfKhqLk8", "sShpyu2l4YQ"], "start_seconds": ["10", "0"], "properties": ["background, metal, clings", "growl, bark, yip"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "the puppies are playing with a toy"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a dog is barking and growling"], "question": "which entity is more active", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "a stream of water runs briefly"], "sample_ids": ["vJvryTwuAV8", "x-PeY8Yb8M4"], "start_seconds": ["16", "300"], "properties": ["audience, cheer, man", "stream, water, run"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["women speak and laugh as wind blows", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["un9VQlzgZM", "zj2R0XoFr5k"], "start_seconds": ["5", "50"], "properties": ["wind, speak, laugh", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is about flying", "label": 1}, {"captions": ["someone snores nearby", "some men converse over an engine running"], "sample_ids": ["spJCm8tD9Zo", "sCiy7QS1U"], "start_seconds": ["90", "300"], "properties": ["someone snores, nearby, someone", "men, converse, engine"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "two women and a man talk while a kid cries"], "sample_ids": ["u6jIvCtKarQ", "wyllXV6PjKo"], "start_seconds": ["70", "30"], "properties": ["a, man, speaks", "a kid, talk, cry"], "captions_pred_video": ["footage of a person using a blender on a stove top", null], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a woman speaks and a baby cries"], "question": "which entity has a kid crying?", "label": 1}, {"captions": ["a clock ticktocks in wind", "a heavy rain falls endlessly"], "sample_ids": ["yVumC9TGknc", "wP8ZKrlx3oA"], "start_seconds": ["30", "40"], "properties": ["ticktocks, clock, wind", "heavy, rain, fall"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "footage of a flooded street in the middle of a desert with mountains in the background"], "captions_pred_audio": ["a series of beeps and chirps", "a heavy rain is falling on a surface"], "question": "which entity is falling", "label": 1}, {"captions": ["water rushes and then a vehicle zooms past", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["s4Uz1Ffgo04", "ziUT9IFTkjg"], "start_seconds": ["100", "10"], "properties": ["water, rushes, vehicle", "background, birds, rustling"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["motors runs briefly and tires screech", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["yRx9txMcBl0", "xKB8O8LTs6s"], "start_seconds": ["40", "70"], "properties": ["motors, tires, screech", "music, gunfire, explosion"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a car is revving its engine and skidding ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["a man is filing a hard object", "water flows and trickles"], "sample_ids": ["vveS8HT7Uog", "tB7hWb9gTuQ"], "start_seconds": ["100", "30"], "properties": ["a man, hard, object", "water, flow, trickle"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "water is splashing and gurgling"], "question": "which entity is flowing", "label": 1}, {"captions": ["an engine runs and a man speaks", "a man speaks as a car is passing by"], "sample_ids": ["yT5WfYMRr-U", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["engine, run, man", "a, car, pass"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a toilet flushes and water drains", "running water in a faucet with some clinks"], "sample_ids": ["sfAvvZwdLCY", "zNRChLjqcU"], "start_seconds": ["20", "220"], "properties": ["water drains, flushes, water", "water, faucet, run"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "water is running from a faucet into a sink"], "question": "which entity has water running through it?", "label": 1}, {"captions": ["bees buzz as wind blows", "a toilet flushes and a female speaks"], "sample_ids": ["tMJne1a4AFI", "yaln9y8I7ms"], "start_seconds": ["0", "230"], "properties": ["bees, buzz, wind", "female, flushes, toilet"], "captions_pred_video": ["a swarm of bees on the ground", "footage is blurry and out of focus"], "captions_pred_audio": ["a swarm of bees buzzing around", "a toilet flushes and a man speaks"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a clock ticktocks briefly", "a stream of water runs briefly"], "sample_ids": ["u7C-AEBQM", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["ticktocks, clock, ticktocks briefly", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a ticktock of a clock", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "an insect buzzes around continuously"], "sample_ids": ["y2bVZ7rz-5M", "v25l1jef3JY"], "start_seconds": ["280", "0"], "properties": ["engine, horn, siren", "buzzes, continuously, insect"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a fly is buzzing around a microphone "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["a person sniffles and sneezes", "three men talk while wind blows and some liquid flows"], "sample_ids": ["uRlbY6aoBU", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["sneezes, sniffles, person", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is sneezing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a person", "label": 0}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "a stream of water runs briefly"], "sample_ids": ["sapQIQUhFc", "x-PeY8Yb8M4"], "start_seconds": ["280", "300"], "properties": ["liquid, flow, distance", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tQWGZLItBXk", "tdWhHV3X25Q"], "start_seconds": ["170", "60"], "properties": ["voice, music, whoosh", "applause, audience, yells"], "captions_pred_video": ["worms revolution screenshots", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["zkKdxzNC97Y", "xKB8O8LTs6s"], "start_seconds": ["27", "70"], "properties": ["hard, surface, door", "music, gunfire, explosion"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a door is opened and closed", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "people cheer as a vehicle engine revs"], "sample_ids": ["yaln9y8I7ms", "xjhAnI2q6hM"], "start_seconds": ["230", "6"], "properties": ["female, flushes, toilet", "engine revs, vehicle, people"], "captions_pred_video": ["footage is blurry and out of focus", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a truck is revving its engine and a man is speaking "], "question": "which entity has a vehicle?", "label": 1}, {"captions": ["a motorcycle engine is idling", "winds blows roughly as a vehicle races past"], "sample_ids": ["vZAqdHZ81yA", "xjvTpk2Zpr8"], "start_seconds": ["180", "70"], "properties": ["engine, motorcycle, idling", "wind, blows, vehicle"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["an engine is idling loudly", "a jet engine roars and wind blows "], "question": "which entity is moving", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "a dark barks and whimpers"], "sample_ids": ["sncRqQ67iJU", "sYj4hpDUZDQ"], "start_seconds": ["460", "30"], "properties": ["loud, repeatedly, man", "barks, whimpers, dark"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "a brown and white dog standing in front of a wall with its mouth open"], "captions_pred_audio": ["a person is snoring", "a dog barks and a cat meows"], "question": "which entity is a dog", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "a person snores loudly multiple times at a close distance"], "sample_ids": ["u7C-AEBQM", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["ticks, rhythmic, quiet", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a ticktock of a clock", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as horns blow", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["tHyNqRyK34A", "sLUnaPT5gM8"], "start_seconds": ["24", "0"], "properties": ["a, man, speaks", "loud, laughter, intermittent"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "after a few seconds of silence, a loud bang occurs followed by a softer banging noise"], "sample_ids": ["zgUgkpk78xU", "zkKdxzNC97Y"], "start_seconds": ["70", "27"], "properties": ["clinking, humming, horn", "loud, bang, noise"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of the door opening and closing in slow motion"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a door is opened and closed"], "question": "which entity is softer", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "a vehicle accelerates and squeals tires"], "sample_ids": ["uZesmtKZGSw", "yRx9txMcBl0"], "start_seconds": ["250", "40"], "properties": ["men, talk, cars", "accelerates, tires, squeals"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a car is revving its engine and skidding "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "pigeons vocalize and birds chirp"], "sample_ids": ["vimzuGQvdcU", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["a, man, yells", "vocalize, bird, chirp"], "captions_pred_video": ["a group of people are rafting down a river", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a child speaks", "an infant crying frantically"], "sample_ids": ["yW6FWLSLkx4", "zwOBqeFTgiU"], "start_seconds": ["40", "30"], "properties": ["a, child, speaks", "cry, infant, frantically"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "of the baby crying in the car seat"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a baby cries loudly"], "question": "which entity is a child", "label": 0}, {"captions": ["frogs croak and vocalize", "a person snores loudly multiple times at a close distance"], "sample_ids": ["yswmmRZFItk", "sSMl2vc3ek"], "start_seconds": ["0", "20"], "properties": ["croak, vocalize, frog", "loud, multiple, distance"], "captions_pred_video": ["a close up of a frog in the water", null], "captions_pred_audio": ["a frog is croaking", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["water flows as men speak and yell", "people speak as gunfire rings out"], "sample_ids": ["vJ7JPEFhyLA", "wqTCwqVRDlk"], "start_seconds": ["16", "80"], "properties": ["water, flow, men", "gunfire, ring, speak"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking and a gun is fired"], "question": "which entity is more calm", "label": 0}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "people cheer as a vehicle engine revs"], "sample_ids": ["zALy31PjDl0", "xjhAnI2q6hM"], "start_seconds": ["21", "6"], "properties": ["a man, a vehicle, a horn", "engine revs, vehicle, people"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a truck is revving its engine and a man is speaking "], "question": "which entity has more people", "label": 1}, {"captions": ["someone is snoring while sleeping", "people cheer as a vehicle engine revs"], "sample_ids": ["ujMt0-D-x2k", "xjhAnI2q6hM"], "start_seconds": ["0", "6"], "properties": ["snore, sleep, someone", "engine revs, vehicle, people"], "captions_pred_video": ["of the dog playing with a toy on the floor", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a person is snoring loudly", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "a telephone rings followed by a woman talking"], "sample_ids": ["wy1eKjR7KC0", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["people, talk, distance", "ring, talk, woman"], "captions_pred_video": ["two police officers riding motorcycles down the street", null], "captions_pred_audio": ["a man is speaking and a siren is going off", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "a woman speaks happily and an animal chirps"], "sample_ids": ["uJV8NDaHqqk", "uWAAAL4CIoc"], "start_seconds": ["100", "0"], "properties": ["loud, fly, chirp", "a woman, chirps, animal"], "captions_pred_video": ["a bee hive in a wooden box", null], "captions_pred_audio": ["a swarm of bees buzzing around", "a woman is speaking and a dog is barking "], "question": "which entity is a person", "label": 1}, {"captions": ["birds chirp and wind blows", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sxIvBMSavMQ", "wqZ135Ssz0"], "start_seconds": ["210", "60"], "properties": ["birds, chirp, wind", "two men, woman, birds"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", null], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more birds", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sapQIQUhFc", "uZesmtKZGSw"], "start_seconds": ["280", "250"], "properties": ["water, trickles, flow", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a woman speaks and food sizzles while frying", "wind blowing followed by a zoom"], "sample_ids": ["wTideSjRFS0", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["food, sizzle, woman", "wind, blow, zoom"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "wind blows and a chainsaw cuts through wood "], "question": "which entity is not a zoom?", "label": 0}, {"captions": ["a woman speaks and a baby laughs", "small dogs yip and bark sharply"], "sample_ids": ["tOj4tdLRaA", "v-wcQf4BDY0"], "start_seconds": ["70", "120"], "properties": ["woman, laugh, baby", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["someone sprays liquid onto a hard surface", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sQwlkXjQabo", "wqZ135Ssz0"], "start_seconds": ["10", "60"], "properties": ["liquid, surface, spray", "two men, woman, birds"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", null], "captions_pred_audio": ["spraying followed by silence", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "a propeller rotates loudly and intensely"], "sample_ids": ["spJCm8tD9Zo", "ugHJF0hfYkg"], "start_seconds": ["90", "10"], "properties": ["snores, wheezes, sleeps", "loud, intense, propeller"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a person is snoring loudly", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["someone snores nearby", "females talk and laugh over gusting wind"], "sample_ids": ["spJCm8tD9Zo", "un9VQlzgZM"], "start_seconds": ["90", "5"], "properties": ["someone snores, nearby, someone", "females, talk, laugh"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and laughing with wind noise and breathing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["people speak as gunfire rings out", "birds chirp and objects are moved around"], "sample_ids": ["wqTCwqVRDlk", "yPUYU6t3rwo"], "start_seconds": ["80", "370"], "properties": ["gunfire, ring, speak", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and a gun is fired", "insects buzz and a man speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a helicopter engine idles continuously", "birds chirp and pigeons vocalize while walking around"], "sample_ids": ["ugHJF0hfYkg", "wIvYjuR3nrg"], "start_seconds": ["10", "9"], "properties": ["engine, idle, continuously", "birds, pigeons, vocalize"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a pigeon sitting on a roof with trees in the background"], "captions_pred_audio": ["a helicopter is flying overhead ", "birds are chirping and cooing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a helicopter engine runs"], "sample_ids": ["y2bVZ7rz-5M", "t5ZbXbniOWk"], "start_seconds": ["280", "30"], "properties": ["motor noise, horn, siren", "engine, helicopter, run"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a helicopter is flying overhead "], "question": "which entity is a helicopter?", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["zl9Dqx-j7q4", "vfYTJq7nU"], "start_seconds": ["6", "130"], "properties": ["engine, laugh, loud", "rustling, ducks, quack"], "captions_pred_video": ["footage of a man driving a car in the dark", null], "captions_pred_audio": ["a jet engine roars ", "a duck quacks and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "water flows as men speak and yell"], "sample_ids": ["sU53zg9Jp7s", "vJ7JPEFhyLA"], "start_seconds": ["380", "16"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "water, flow, men"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "a man speaks as a car is passing by"], "sample_ids": ["vms5XGTDVQc", "sK4u5T8hW78"], "start_seconds": ["220", "30"], "properties": ["paper, crumpled, crinkled", "a, car, pass"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["paper is crumpled and crinkled", "a man is speaking with background noise and breathing sounds "], "question": "which entity is moving", "label": 1}, {"captions": ["a few ducks quack and scamper and a man speaks", "water splashes and a door squeaks"], "sample_ids": ["w2bYrCVLT60", "sdXV-ylviw"], "start_seconds": ["120", "190"], "properties": ["ducks, speak, quack", "sound, splash, door"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", null], "captions_pred_audio": ["ducks are quacking and a man is speaking", "a dog barks and taps with background noise "], "question": "which entity is silent", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["zY3icUyMdh8", "wz7N8YRy74I"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "rooster, crow, background, men"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is more social", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "wind blowing followed by a zoom"], "sample_ids": ["sofxkNWaP0s", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["wind, engine, louder", "wind, blow, zoom"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom?", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "a man speaks as a motor runs in the background"], "sample_ids": ["s59PfAghdkM", "xZepNM9qcRA"], "start_seconds": ["0", "30"], "properties": ["bird, chirp, background, horse, neigh", "background, motor, run"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "an airplane engine spools and people speak"], "sample_ids": ["un9VQlzgZM", "wTjoRj1se3U"], "start_seconds": ["5", "390"], "properties": ["females, talk, laugh", "airplane, engine, spool"], "captions_pred_video": [null, "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a jet engine is running and people are talking"], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sQwlkXjQabo", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["liquid, surface, spray", "stream, water, flow"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "footage is blurry and out of focus"], "captions_pred_audio": ["spraying followed by silence", "a man is speaking with wind noise in the background "], "question": "which entity is flowing", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["xSKJGCItUWE", "ukg5L09Wpvo"], "start_seconds": ["10", "150"], "properties": ["engine, run, boy", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of the helicopter flying in the room", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["multiple people speak while a television play and a child screams", "a diesel truck engine runs steadily"], "sample_ids": ["yks4cLgIDMc", "sZvwOuuPGP0"], "start_seconds": ["170", "50"], "properties": ["multiple people, television, child", "engine, diesel, truck"], "captions_pred_video": ["footage of two kids wrestling on the floor", "of a bulldozer clearing a road in a forest stock footage and royalty-free videos"], "captions_pred_audio": ["a man is speaking and a child is crying", "a medium engine is running "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["an insect buzzes around continuously", "a toilet flushes and water drains unevenly"], "sample_ids": ["v25l1jef3JY", "vhJWZheqaE"], "start_seconds": ["0", "0"], "properties": ["buzzes, continuously, insect", "water drains unevenly, toilet flushes, water drains"], "captions_pred_video": ["a black background with a cartoon character in the foreground", null], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a toilet is flushed"], "question": "which entity is not a living thing", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "people cheer as a vehicle engine revs"], "sample_ids": ["ziUT9IFTkjg", "xjhAnI2q6hM"], "start_seconds": ["10", "6"], "properties": ["background, birds, rustling", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a truck is revving its engine and a man is speaking "], "question": "which entity is more likely to be in a car", "label": 1}, {"captions": ["a diesel truck engine runs continuously", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sZvwOuuPGP0", "zj2R0XoFr5k"], "start_seconds": ["50", "50"], "properties": ["engine, diesel, truck", "airplane, boy, fly"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a medium engine is running ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a stream of water runs briefly", "a woman speaks as she rubs two objects together"], "sample_ids": ["x-PeY8Yb8M4", "vzxHnu-SFEw"], "start_seconds": ["300", "80"], "properties": ["stream, water, run", "two objects, woman, speak"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a car is driving on a wet road ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a physical action", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["x5cuQjOdM3E", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["cat, talk, meow", "loud, jet engine, roar"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a cat meows and a woman speaks", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "small dogs yip and bark sharply"], "sample_ids": ["vZAw4apG0Es", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["background, clock, ticktocks", "bark, yip, sharply"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a clock is ticking and people are talking", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a person snoring", "birds chirp and objects are moved around"], "sample_ids": ["t8tv5YRMJUg", "yPUYU6t3rwo"], "start_seconds": ["0", "370"], "properties": ["a person, snore, loud", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a man getting his face licked by another man", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a person sniffs and breathes heavily", "insects buzz and a man speaks"], "question": "which entity is not a person?", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "wind blows as people chatter quietly"], "sample_ids": ["uKCSGgof8gI", "xBxDz0CFVn0"], "start_seconds": ["12", "30"], "properties": ["chirps, distance, signal", "wind, chatter, people"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "footage is blurry and out of focus"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a dog barks and whimpers", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["sShpyu2l4YQ", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["barks, whimpers, dog", "a woman, laughs, animal"], "captions_pred_video": ["the puppies are playing with a toy", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a dog is barking and growling", "a woman is speaking and a baby is crying"], "question": "which entity is more playful", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["xfudFO976zE", "zl9Dqx-j7q4"], "start_seconds": ["0", "6"], "properties": ["animal, bleats, cry", "engine, laugh, loud"], "captions_pred_video": ["footage is blurry and shaky", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["dogs barking and whimpering", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["tIY7qOV3rEM", "wDVMhEdTiVw"], "start_seconds": ["0", "30"], "properties": ["barking, whimpering, dog", "gun, shoot, water"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more violent", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "pigeons vocalize and birds chirp"], "sample_ids": ["ukxt9I7eMMg", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["continuous, woman, speaking", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "of the pigeon in the cage"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a motor idles, accelerates, then slows down.", "small dogs yip and bark sharply"], "sample_ids": ["vYkA3cfXp5Q", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["speed, idle, accelerate", "bark, yip, sharply"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["an engine is idling", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["yLy-WycbVVE", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["background, people, talk", "a woman, a television program, a bird"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program playing in the background?", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["tDVADusiIoc", "tDlysoZiA1I"], "start_seconds": ["60", "0"], "properties": ["wind, radio, waves", "animal, grunts, chirps"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "birds are chirping and a rooster is crowing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "paper is crumpling consistently"], "sample_ids": ["zhx6hoYrHeI", "v5cSxLaHADY"], "start_seconds": ["160", "0"], "properties": ["engine, sputter, rough", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a car accelerates and revs its engine ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["an insect buzzes around continuously", "winds blows roughly as a vehicle races past"], "sample_ids": ["v25l1jef3JY", "xjvTpk2Zpr8"], "start_seconds": ["0", "70"], "properties": ["buzzes, continuously, insect", "wind, blows, vehicle"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["water flows followed by women screaming", "a woman speaks happily and an animal chirps"], "sample_ids": ["w5W5Kqtc8E", "uWAAAL4CIoc"], "start_seconds": ["100", "0"], "properties": ["water, flow, women", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman is speaking and a dog is barking "], "question": "which entity is more calm", "label": 1}, {"captions": ["an infant crying frantically", "people speak as gunfire rings out"], "sample_ids": ["zwOBqeFTgiU", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["cry, infant, frantically", "gunfire, ring, speak"], "captions_pred_video": ["of the baby crying in the car seat", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a baby cries loudly", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war zone", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "people speak as gunfire rings out"], "sample_ids": ["sHbXC6na9hg", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["a person, saw, wood", "gunfire, ring, speak"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["an engine is idling and vibrating", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "an infant crying frantically"], "sample_ids": ["zofjfKhqLk8", "zwOBqeFTgiU"], "start_seconds": ["10", "30"], "properties": ["noise, stop, motor", "cry, infant, frantically"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "of the baby crying in the car seat"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a baby cries loudly"], "question": "which entity is a human", "label": 1}, {"captions": ["a stream of water flows quickly", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["wbHTKEJZyhc", "uZesmtKZGSw"], "start_seconds": ["20", "250"], "properties": ["stream, water, flow", "men, talk, cars"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is moving faster", "label": 1}, {"captions": ["goats bleat and people speak", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["z5iUE5h0EPs", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["goats bleat, people speak, language", "rooster, crow, background, men"], "captions_pred_video": ["of the goat in the barn", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a goat bleats and a man speaks", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a rooster?", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "a machine beeps continuously"], "sample_ids": ["vbpKkWvfOu4", "y682ml90jGw"], "start_seconds": ["560", "11"], "properties": ["a, man, speaks", "beeps, machine, continuously"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["a person is snoring while sleeping", "people applaud and hoot and chat quietly"], "sample_ids": ["vJrjSeP17yE", "wwyfGO2J4"], "start_seconds": ["40", "90"], "properties": ["a person is sleeping, snoring, person", "people, applaud, hoot"], "captions_pred_video": ["a black background with a small plane flying in the sky", null], "captions_pred_audio": ["a person snoring loudly", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a helicopter engine idles continuously", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["ugHJF0hfYkg", "wqZ135Ssz0"], "start_seconds": ["10", "60"], "properties": ["engine, idle, continuously", "two men, woman, birds"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a human activity", "label": 1}, {"captions": ["a airplane flies overhead as a woman speaks", "a man woman speak while crickets sing"], "sample_ids": ["zj2R0XoFr5k", "zTLVJCo4WEE"], "start_seconds": ["50", "30"], "properties": ["airplane, fly, woman", "a, crickets, sing"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "- a boy with a rifle aiming at a target"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a woman speaks and crickets chirp"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["dogs bark as an engine runs and a person whistles", "water pouring and bubbling"], "sample_ids": ["zY3icUyMdh8", "uyRfq-jKPpo"], "start_seconds": ["20", "50"], "properties": ["dog, bark, engine", "water, bubbles, pouring"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "water is running from a faucet"], "question": "which entity is more calm", "label": 1}, {"captions": ["some people speak", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["vbZ-0lGPneg", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "beeps, hit, woman"], "captions_pred_video": ["of a man holding a baby duck in his hands", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "someone is typing on a computer keyboard"], "sample_ids": ["t8CV69hcvF0", "v0x1odnXtP0"], "start_seconds": ["210", "210"], "properties": ["person, sneeze, follow", "keyboard, type, computer"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "how to make money on youtube in spanish"], "captions_pred_audio": ["a woman sneezes and speaks", "a person is typing on a keyboard"], "question": "which person is typing on a computer keyboard", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "a vehicle engine accelerating then running on idle"], "sample_ids": ["yJ0TePmaOo", "vYkA3cfXp5Q"], "start_seconds": ["390", "30"], "properties": ["two hard objects, man, speak", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "an engine is idling"], "question": "which is a vehicle", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "a infant makes noise and is excited"], "sample_ids": ["zuua6-5goWw", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["birds, chirp, quiet, man, speaks", "noise, excited, infant"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 0}, {"captions": ["two women and a man talk while a kid cries", "a steam engine runs and whistles as it passes by"], "sample_ids": ["wyllXV6PjKo", "se87d6yxEOA"], "start_seconds": ["30", "10"], "properties": ["a kid, talk, cry", "run, whistle, pass"], "captions_pred_video": [null, "footage of a train passing by a train station with smoke billowing out of the train's smokestack"], "captions_pred_audio": ["a woman speaks and a baby cries", "a train is moving and blowing its whistle "], "question": "which entity is moving", "label": 1}, {"captions": ["paper folding and crinkling", "a woman speaks as frying food sizzles"], "sample_ids": ["zPpG3RD8lSs", "wTideSjRFS0"], "start_seconds": ["20", "30"], "properties": ["paper, fold, crinkle", "food, sizzle, woman"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a woman is speaking while water is running in the background"], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "an infant crying frantically"], "sample_ids": ["xO-Q2BlIIPU", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["two men, exclamation, speak", "cry, infant, frantically"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["pigeons vocalize and birds chirp", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["uiS58TNyUiw", "uEU-Hg5MTN8"], "start_seconds": ["430", "27"], "properties": ["vocalize, bird, chirp", "a woman, laughs, animal"], "captions_pred_video": ["of the pigeon in the cage", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a woman is speaking and a baby is crying"], "question": "which entity is a human", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["uEU-Hg5MTN8", "yDoT73BWsdA"], "start_seconds": ["27", "10"], "properties": ["a woman, laughs, animal", "engine, revs, vehicle"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a goat screams and people speak in the background", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["xC8kbrKJmco", "wDVMhEdTiVw"], "start_seconds": ["0", "30"], "properties": ["background, goat, scream", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a goat is bleating ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["multiple ducks quack continuously", "an adult woman and an adult man speak"], "sample_ids": ["wfHeoPDLMaM", "zTLVJCo4WEE"], "start_seconds": ["30", "30"], "properties": ["multiple, quack, continuously", "two people, adult, speak"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "- a boy with a rifle aiming at a target"], "captions_pred_audio": ["ducks are quacking", "a woman speaks and crickets chirp"], "question": "which entity is speaking", "label": 1}, {"captions": ["a door opens and closes", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vBHyYJ8pL0", "uEU-Hg5MTN8"], "start_seconds": ["2", "27"], "properties": ["open, close, door", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a door", "label": 0}, {"captions": ["water rushes by", "a man speaks as water trickles down a stream"], "sample_ids": ["x-PeY8Yb8M4", "sapQIQUhFc"], "start_seconds": ["300", "280"], "properties": ["water, rushes, by", "water, stream, trickles"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", null], "captions_pred_audio": ["a car is driving on a wet road ", "a man is speaking and a stream is flowing in the background "], "question": "which entity is moving more slowly", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "an audience gives applause"], "sample_ids": ["xZepNM9qcRA", "x6iCUDmRpKQ"], "start_seconds": ["30", "38"], "properties": ["background, motor, run", "applause, audience, give"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "a black background with the moon and stars in the sky"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a group of people are clapping and cheering"], "question": "which is a more active scene", "label": 1}, {"captions": ["a power tool runs and touches a surface", "some people speak"], "sample_ids": ["zfvPRf3chY", "vbZ-0lGPneg"], "start_seconds": ["290", "30"], "properties": ["power tool, run, touch", "some people speak English, some people speak Spanish, some people speak French"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking while a power tool is being used ", "a woman is speaking and a dog is whimpering"], "question": "which entity is not a power tool", "label": 1}, {"captions": ["water running down a sink while a man is talking", "a person screams glaringly"], "sample_ids": ["vSeGhaZt-aI", "xC8kbrKJmco"], "start_seconds": ["50", "0"], "properties": ["water, sink, talk", "glaringly, screams, person"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", null], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a goat is bleating "], "question": "which entity is more likely to be a person", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "paper is crumpling consistently"], "sample_ids": ["sQGXqGcwOTc", "v5cSxLaHADY"], "start_seconds": ["3", "0"], "properties": ["cling, speak, dishes", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["vJ7JPEFhyLA", "yDoT73BWsdA"], "start_seconds": ["16", "10"], "properties": ["three men, wind, flow", "engine, revs, vehicle"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "a car speeds away loudly followed by a car revving loudly and driving away while outside"], "sample_ids": ["ukg5L09Wpvo", "sjlVMgdGSK0"], "start_seconds": ["150", "30"], "properties": ["a train, a horn, a bell", "car, revving, loudly"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "a 1965 ford falcon drag racing at 100mph on a 1/8 mile track"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a car accelerates and revs its engine "], "question": "which entity is revving loudly", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "pigeons vocalize and birds chirp"], "sample_ids": ["y2bVZ7rz-5M", "uiS58TNyUiw"], "start_seconds": ["280", "430"], "properties": ["motor noise, horn, siren", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "of the pigeon in the cage"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "soft movement is accompanied by clocks ticking in the background"], "sample_ids": ["sQGXqGcwOTc", "vlJS7LN2XyM"], "start_seconds": ["3", "30"], "properties": ["cling, speak, dishes", "background, clocks, ticking"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a ticktock of a clock"], "question": "which entity is more quiet", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["tIY7qOV3rEM", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "a woman, laughs, animal"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["uZesmtKZGSw", "tdWhHV3X25Q"], "start_seconds": ["250", "60"], "properties": ["men, talk, cars", "applause, audience, yells"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a toilet flushes and water sputters as it drains", "people applaud and hoot and chat quietly"], "sample_ids": ["smGI3C1NZc", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["water, drain, toilet", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a toilet is flushed", "people are clapping and speaking with background noise "], "question": "which entity is a performance", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vcmWSmvti8", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["music, man, fire", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a duck quacks and a woman speaks"], "question": "which entity is about hunting?", "label": 1}, {"captions": ["multiple insects buzz over rustling wind", "a toilet flushes and a female speaks"], "sample_ids": ["tMJne1a4AFI", "yaln9y8I7ms"], "start_seconds": ["0", "230"], "properties": ["wind, buzz, rustling", "female, flushes, toilet"], "captions_pred_video": ["a swarm of bees on the ground", "footage is blurry and out of focus"], "captions_pred_audio": ["a swarm of bees buzzing around", "a toilet flushes and a man speaks"], "question": "which entity is not a toilet?", "label": 0}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "a man speaks followed by another man speaking outside"], "sample_ids": ["ul60S8TXDA8", "viuTg1M-dqg"], "start_seconds": ["60", "30"], "properties": ["sound, distance, bell", "two men, speak, follow"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more than one speaker?", "label": 1}, {"captions": ["a dog barks and whimpers", "a man speaks as a car is passing by"], "sample_ids": ["sShpyu2l4YQ", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["barks, whimpers, dog", "a, car, pass"], "captions_pred_video": ["the puppies are playing with a toy", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a dog is barking and growling", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["a person is whistling a tune", "a toilet flushes and water drains"], "sample_ids": ["scYRUkrFLiQ", "sfAvvZwdLCY"], "start_seconds": ["30", "20"], "properties": ["a, tune, whistle", "water drains, flushes, water"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a person whistling a song", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "a man speaks as a car is passing by"], "sample_ids": ["vZAw4apG0Es", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["background, clock, ticktocks", "a, car, pass"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a clock ticktocking in the background", "label": 0}, {"captions": ["there are rhythmical snoring nearby", "an engine runs loudly"], "sample_ids": ["ujMt0-D-x2k", "vqZuVbG6-HI"], "start_seconds": ["0", "130"], "properties": ["snoring, rhythmical, nearby", "loud, engine, run"], "captions_pred_video": ["of the dog playing with a toy on the floor", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a person is snoring loudly", "a lawn mower is running and men are speaking "], "question": "which is louder", "label": 1}, {"captions": ["food is frying and sizzles", "a man speaks as a car is passing by"], "sample_ids": ["zNRChLjqcU", "sK4u5T8hW78"], "start_seconds": ["220", "30"], "properties": ["food is frying, sizzles, food", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["water is running from a faucet into a sink", "a man is speaking with background noise and breathing sounds "], "question": "which entity is not a person?", "label": 0}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "three men talk while wind blows and some liquid flows"], "sample_ids": ["smDKStoHBJo", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["a, infant, speaking", "three men, wind, flow"], "captions_pred_video": ["a man holding a crying baby in his arms", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["a child yells and another yells", "people cheer as a vehicle engine revs"], "sample_ids": ["vMDHu7Lxcgw", "xjhAnI2q6hM"], "start_seconds": ["410", "6"], "properties": ["two, yell, child", "engine revs, vehicle, people"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a truck is revving its engine and a man is speaking "], "question": "which entity has more people", "label": 1}, {"captions": ["a horse runs while two women talk", "a clock ticktocks"], "sample_ids": ["sdvI1mHAsc", "v-g-j2uTByM"], "start_seconds": ["20", "30"], "properties": ["two women, horse, run", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "vehicles pass by on a roadway"], "sample_ids": ["xV7Mg1QucSc", "tgbONvsP47Y"], "start_seconds": ["14", "0"], "properties": ["alarm, ticktocks, laughs", "pass, vehicle, roadway"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "footage of a fire truck entering a garage"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a car is driving on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["birds chirp then an animal grunts", "a car speeding up in the distance"], "sample_ids": ["tDlysoZiA1I", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["animal, grunt, chirp", "distance, car, speed"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", null], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["two frogs croak at each other", "vehicles pass by on a roadway"], "sample_ids": ["zg0X6BnhOLQ", "tgbONvsP47Y"], "start_seconds": ["410", "0"], "properties": ["two frogs, croak, at each other", "pass, vehicle, roadway"], "captions_pred_video": ["footage of lightning in the sky at night", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a frog is croaking", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["someone whistles a tune", "a man is filing a hard object"], "sample_ids": ["sIXTftIuUgw", "vveS8HT7Uog"], "start_seconds": ["90", "100"], "properties": ["someone, tune, whistle", "a man, hard, object"], "captions_pred_video": [null, "footage is of a workbench with various tools on it including a hammer and a screwdriver"], "captions_pred_audio": ["a person whistling a song", "a man is filing and speaking with background noise and breathing "], "question": "which action is more passive", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["zofjfKhqLk8", "y8WEcpOlT3I"], "start_seconds": ["10", "40"], "properties": ["background, metal, clings", "harsh, wind, blows"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking with wind noise in the background "], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "a train horn blows as it passes by"], "sample_ids": ["w5W5Kqtc8E", "zVacuqSb4LI"], "start_seconds": ["100", "30"], "properties": ["water, splashes, motorboat", "horn, blows, train"], "captions_pred_video": [null, "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train?", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["zofjfKhqLk8", "uYT5gxnyMWM"], "start_seconds": ["10", "50"], "properties": ["noise, stop, motor", "female, spraying, scream"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["tiDFTC-5vU", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["male, duck, laugh", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a door opens and birds chirp", "a woman speaks with water running"], "sample_ids": ["yeFvk9x0wWI", "wTideSjRFS0"], "start_seconds": ["30", "30"], "properties": ["door, open, birds", "water, running, woman"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a woman is speaking while water is running in the background"], "question": "which entity is a video of a door opening and birds chirping?", "label": 0}, {"captions": ["some liquid flows while a woman laughs and man talks", "a clock ticks quietly and rhythmically"], "sample_ids": ["vddP56-ogds", "u7C-AEBQM"], "start_seconds": ["30", "30"], "properties": ["liquid, laughs, man", "ticks, rhythmic, quiet"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a ticktock of a clock"], "question": "which entity is quieter", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "water flows as men speak and yell"], "sample_ids": ["s3cTDAj31g", "vJ7JPEFhyLA"], "start_seconds": ["80", "16"], "properties": ["man, talk, woman", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and a baby is crying", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more people", "label": 1}, {"captions": ["water flows as men speak and yell", "an engine starts and increases in power"], "sample_ids": ["vJ7JPEFhyLA", "zjTG0gaGCUI"], "start_seconds": ["16", "80"], "properties": ["water, flow, men", "power, increase, engine"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a jet engine roars as wind blows "], "question": "which entity is more powerful", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["zkKdxzNC97Y", "zj2R0XoFr5k"], "start_seconds": ["27", "50"], "properties": ["hard, surface, door", "airplane, boy, fly"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a door is opened and closed", "a woman speaks while a helicopter flies overhead "], "question": "which object is moving", "label": 1}, {"captions": ["multiple ducks quack continuously", "a duck quacks loudly and continuously"], "sample_ids": ["wfHeoPDLMaM", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["multiple, quack, continuously", "loud, continuous, quacks"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["ducks are quacking", "a duck is quacking loudly"], "question": "which duck is louder", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "people applaud and hoot and chat quietly"], "sample_ids": ["vXlk0lIQBFo", "wwyfGO2J4"], "start_seconds": ["470", "90"], "properties": ["wind, speak, vocalize", "people, applaud, hoot"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", null], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "someone is typing on a computer keyboard"], "sample_ids": ["tQWGZLItBXk", "v0x1odnXtP0"], "start_seconds": ["170", "210"], "properties": ["music, person, ding", "keyboard, type, computer"], "captions_pred_video": ["worms revolution screenshots", "how to make money on youtube in spanish"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a person is typing on a keyboard"], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["a church bell rings several times", "winds blows roughly as a vehicle races past"], "sample_ids": ["sUVVjE3Ucp8", "xjvTpk2Zpr8"], "start_seconds": ["0", "70"], "properties": ["ring, bell, several", "wind, blows, vehicle"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a church bell is ringing ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a baby laugh at a sputter", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sLUnaPT5gM8", "wqZ135Ssz0"], "start_seconds": ["0", "60"], "properties": ["laugh, sputter, baby", "two men, woman, birds"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", null], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["uqFtmnhuqA8", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["a, b, c", "water, radio, man"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "distant men speak as a spray can nozzle is depressed"], "sample_ids": ["vh30P49Po6s", "rwtmaKiCcQU"], "start_seconds": ["30", "30"], "properties": ["loud, continuous, quacks", "nozzle, depressed, spray can"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "shows a man spraying paint on a wall with a spray gun"], "captions_pred_audio": ["a duck is quacking loudly", "spraying and people speaking"], "question": "which entity is silent", "label": 1}, {"captions": ["a helicopter engine idles continuously", "birds chirp as a train approaches"], "sample_ids": ["ugHJF0hfYkg", "xM4joTqDVp4"], "start_seconds": ["10", "160"], "properties": ["engine, idle, continuously", "bird, chirp, train"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack"], "captions_pred_audio": ["a helicopter is flying overhead ", "birds are chirping and a train is moving "], "question": "which entity is not a train?", "label": 0}, {"captions": ["a train horn blows as it passes by", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["zVacuqSb4LI", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["horn, blows, train", "motor noise, horn, siren"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["slZLHwNbbt4", "wDVMhEdTiVw"], "start_seconds": ["300", "30"], "properties": ["a, horn, run", "gun, shoot, water"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a gun", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "pigeons vocalize and birds chirp"], "sample_ids": ["yks4cLgIDMc", "uiS58TNyUiw"], "start_seconds": ["170", "430"], "properties": ["background, speaking, child", "vocalize, bird, chirp"], "captions_pred_video": ["footage of two kids wrestling on the floor", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking and a child is crying", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a man speaks as a machine runs", "the revving of an engine throttle followed by a man speaking"], "sample_ids": ["vD6lYD1l0BY", "tezvROoo4bs"], "start_seconds": ["330", "40"], "properties": ["a, machine, run", "audio, throttle, speaking"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of a busy city street with cars parked on both sides of the road"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a car accelerates and revs while a man speaks "], "question": "which entity is about a man speaking as a machine runs?", "label": 0}, {"captions": ["people speak as gunfire rings out", "a woman speaks as she rubs two objects together"], "sample_ids": ["wqTCwqVRDlk", "vzxHnu-SFEw"], "start_seconds": ["80", "80"], "properties": ["gunfire, ring, speak", "two objects, woman, speak"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and a gun is fired", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "a duck quacks continuously"], "sample_ids": ["vh30P49Po6s", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["loud, continuous, quacks", "quacks, continuously, duck"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a duck is quacking loudly", "a duck is quacking loudly"], "question": "which duck is louder", "label": 0}, {"captions": ["a man speaks as he moves silverware in a bowl", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["x6ijhqRY38s", "vfYTJq7nU"], "start_seconds": ["250", "130"], "properties": ["bowl, silverware, man", "rustling, ducks, quack"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", null], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a duck quacks and a woman speaks"], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["an small aircraft engine runs and a boy speaks", "continuous chugging with birds chirping in the background"], "sample_ids": ["xSKJGCItUWE", "xM4joTqDVp4"], "start_seconds": ["10", "160"], "properties": ["engine, run, boy", "background, chirp, birds"], "captions_pred_video": ["footage of the helicopter flying in the room", "footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "birds are chirping and a train is moving "], "question": "which entity has a boy speaking?", "label": 0}, {"captions": ["a weapon fires multiple times", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["sMC07Ucy7kg", "xfaoyyzw2WU"], "start_seconds": ["10", "180"], "properties": ["weapon, fire, multiple", "loud, jet engine, roar"], "captions_pred_video": ["footage is from a car's point of view", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "some tunes played by whistling"], "sample_ids": ["tDVADusiIoc", "u6BnG6YZqJ4"], "start_seconds": ["60", "0"], "properties": ["wind, radio, waves", "tune, play, whistling"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a person whistling a song"], "question": "which entity is played by whistling", "label": 1}, {"captions": ["a train engine runs and a horn blows", "an infant crying as a woman laughs"], "sample_ids": ["zPX9o1uDiI", "xhmRY9yhC7c"], "start_seconds": ["40", "20"], "properties": ["engine, horn, run", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a male speaks over some small clicks", "a motorcycle idles loudly as wind blows"], "sample_ids": ["uXxVebHsGZ8", "v7jJS8aAyA"], "start_seconds": ["30", "10"], "properties": ["male, clicks, speak", "wind, blows, loudly"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a motorcycle engine is idling and vibrating"], "question": "which entity is louder", "label": 1}, {"captions": ["a large crowd cheers and applauds", "a clock ticktocks"], "sample_ids": ["rqfQRErjfk8", "v-g-j2uTByM"], "start_seconds": ["170", "30"], "properties": ["crowd, cheers, applauds", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["birds coo incessantly", "multiple people speak and children yell while water gurgles"], "sample_ids": ["yZrFNS7GFBQ", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["coo, bird, incessant", "multiple, people, yell"], "captions_pred_video": ["of the bird in the cage", null], "captions_pred_audio": ["an owl hoots in the background ", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man speaks, then dials a rotary telephone", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["tK4VlLsNxak", "wqZ135Ssz0"], "start_seconds": ["120", "60"], "properties": ["a, dial, telephone", "two men, woman, birds"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", null], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["some men converse over an engine running", "an infant crying frantically"], "sample_ids": ["sCiy7QS1U", "zwOBqeFTgiU"], "start_seconds": ["300", "30"], "properties": ["men, converse, engine", "cry, infant, frantically"], "captions_pred_video": [null, "of the baby crying in the car seat"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "birds chirp and objects are moved around"], "sample_ids": ["vddP56-ogds", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["water, flow, laugh", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["t25U-v4k4ts", "tDVADusiIoc"], "start_seconds": ["40", "60"], "properties": ["a, chirps, bird", "water, radio, man"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio?", "label": 1}, {"captions": ["an infant crying as a woman laughs", "water pouring and bubbling"], "sample_ids": ["xhmRY9yhC7c", "uyRfq-jKPpo"], "start_seconds": ["20", "50"], "properties": ["a, laugh, infant", "water, bubbles, pouring"], "captions_pred_video": ["of a baby crying in a baby bouncer", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a baby cries and a woman speaks", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "an infant crying frantically"], "sample_ids": ["xZepNM9qcRA", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["background, motor, run", "cry, infant, frantically"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "of the baby crying in the car seat"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "a duck quacks continuously"], "sample_ids": ["vKrYfzleLB8", "vh30P49Po6s"], "start_seconds": ["110", "30"], "properties": ["a, ring, gunshots", "quacks, continuously, duck"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["motors runs briefly and tires screech", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["yRx9txMcBl0", "tiDFTC-5vU"], "start_seconds": ["40", "30"], "properties": ["motors, tires, screech", "male, duck, laugh"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", null], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["a vehicle accelerates squealing tires", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sd7xVssqlw", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["accelerates, tires, squealing", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a vehicle?", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "a stream of water runs briefly"], "sample_ids": ["siJFXfGWgDk", "x-PeY8Yb8M4"], "start_seconds": ["50", "300"], "properties": ["a, bird, vehicle", "stream, water, run"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a vehicle accelerates squealing tires", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sd7xVssqlw", "uYT5gxnyMWM"], "start_seconds": ["50", "50"], "properties": ["accelerates, tires, squealing", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "dishes cling together then a man begins to speak"], "sample_ids": ["wz7N8YRy74I", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["rooster, crow, background, men", "cling, speak, dishes"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a woman speaks as she rubs two objects together", "a person screams glaringly"], "sample_ids": ["vzxHnu-SFEw", "xC8kbrKJmco"], "start_seconds": ["80", "0"], "properties": ["two objects, woman, speak", "glaringly, screams, person"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", null], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a goat is bleating "], "question": "which entity is more silent", "label": 1}, {"captions": ["multiple birds vocalize and wind blows", "a stream of water flows as people talk and wind blows"], "sample_ids": ["uoGVs9yUqY4", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["multiple, vocalize, wind", "stream, water, flow"], "captions_pred_video": ["for how to make a wooden shed door youtube", "footage is blurry and out of focus"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["ugHJF0hfYkg", "w34HjHr6gAY"], "start_seconds": ["10", "30"], "properties": ["loud, intense, propeller", "beeps, hit, woman"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a helicopter is flying overhead ", "a beep sounds followed by a child speaking"], "question": "which entity is quieter", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "water pouring and bubbling"], "sample_ids": ["ukxt9I7eMMg", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["food, pan, cook", "water, bubbles, pouring"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "wind blows as people chatter quietly"], "sample_ids": ["wRV8yMk886E", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["liquid, spray, nozzle", "wind, chatter, people"], "captions_pred_video": ["two cars are parked in a parking lot at night", "footage is blurry and out of focus"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "three men talk while wind blows and some liquid flows"], "sample_ids": ["siJFXfGWgDk", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["man, woman, vehicle", "three men, wind, flow"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more people", "label": 1}, {"captions": ["a man speaks as a machine runs", "water flows as men speak and yell"], "sample_ids": ["vD6lYD1l0BY", "vJ7JPEFhyLA"], "start_seconds": ["330", "16"], "properties": ["a, machine, run", "water, flow, men"], "captions_pred_video": ["game controller being held in the hands of the person", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking as a machine runs?", "label": 0}, {"captions": ["a clock ticktocks", "wind blows in gusts as a woman speaks in the distance"], "sample_ids": ["v-g-j2uTByM", "uC9dtII1KDI"], "start_seconds": ["30", "150"], "properties": ["ticktocks, clock, ticktocks", "wind, gusts, distance"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "footage of a person riding a horse in a riding arena"], "captions_pred_audio": ["a clock is ticking loudly", "a woman is speaking with wind noise and breathing in the background "], "question": "which entity is not a clock?", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zofjfKhqLk8", "tdWhHV3X25Q"], "start_seconds": ["10", "60"], "properties": ["background, metal, clings", "applause, audience, yells"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["running water in a faucet with some clinks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["zNRChLjqcU", "xBxDz0CFVn0"], "start_seconds": ["220", "30"], "properties": ["water, faucet, run", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["water is running from a faucet into a sink", "a man is speaking with wind noise in the background "], "question": "which entity is flowing water", "label": 1}, {"captions": ["a man talks as several small engines run", "a man is snoring loudly and repeatedly"], "sample_ids": ["u9A6VZQCZpU", "sncRqQ67iJU"], "start_seconds": ["30", "460"], "properties": ["a, man, talk", "loud, repeatedly, man"], "captions_pred_video": [null, "of an airplane flying in the dark sky at night"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a person is snoring"], "question": "which man is louder", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a man speaks on a radio as wind blows"], "sample_ids": ["s4Uz1Ffgo04", "tDVADusiIoc"], "start_seconds": ["100", "60"], "properties": ["roars, background, people speaking", "man, radio, blows"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking on a radio?", "label": 1}, {"captions": ["people speak then an engine runs", "water pouring and bubbling"], "sample_ids": ["uMTTDZ2mb4", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["engine, run, people", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["two frogs croak at each other", "a telephone rings followed by a woman talking"], "sample_ids": ["zg0X6BnhOLQ", "tGcFnX0GHI"], "start_seconds": ["410", "0"], "properties": ["two frogs, croak, at each other", "ring, talk, woman"], "captions_pred_video": ["footage of lightning in the sky at night", null], "captions_pred_audio": ["a frog is croaking", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "a man speaks as a car is passing by"], "sample_ids": ["yYEVLuqEytU", "sK4u5T8hW78"], "start_seconds": ["40", "30"], "properties": ["grunt, slurp, background", "a, car, pass"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["bees buzz and wind blows", "people cheer as a vehicle engine revs"], "sample_ids": ["tMJne1a4AFI", "xjhAnI2q6hM"], "start_seconds": ["0", "6"], "properties": ["bees buzz, wind blows, bees", "engine revs, vehicle, people"], "captions_pred_video": ["a swarm of bees on the ground", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a swarm of bees buzzing around", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "people applaud and hoot and chat quietly"], "sample_ids": ["tiDFTC-5vU", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["male, duck, laugh", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and ducks are quacking", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be at a party", "label": 1}, {"captions": ["a train horn blares as a train passes, then fades", "a horn rings out as a machine runs by"], "sample_ids": ["zVacuqSb4LI", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["blares, fades, train", "a, horn, run"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["uEU-Hg5MTN8", "tiDFTC-5vU"], "start_seconds": ["27", "30"], "properties": ["a woman, laughs, animal", "male, duck, laugh"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking?", "label": 1}, {"captions": ["a person is snoring while sleeping", "people speak as gunfire rings out"], "sample_ids": ["vJrjSeP17yE", "wqTCwqVRDlk"], "start_seconds": ["40", "80"], "properties": ["a person is sleeping, snoring, person", "gunfire, ring, speak"], "captions_pred_video": ["a black background with a small plane flying in the sky", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking and a gun is fired"], "question": "which entity is more quiet", "label": 0}, {"captions": ["people cheer as a vehicle engine revs", "vehicles pass by on a roadway"], "sample_ids": ["xjhAnI2q6hM", "tgbONvsP47Y"], "start_seconds": ["6", "0"], "properties": ["engine revs, vehicle, people", "pass, vehicle, roadway"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a car is driving on the road "], "question": "which vehicle is moving", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "winds blows roughly as a vehicle races past"], "sample_ids": ["sZPuqDgX2V0", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["engine, accelerate, intercom", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["xKB8O8LTs6s", "uEU-Hg5MTN8"], "start_seconds": ["70", "27"], "properties": ["music, radio, gunshots", "a woman, laughs, animal"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking on a radio?", "label": 0}, {"captions": ["a man speaking with light rustling", "females talk and laugh over gusting wind"], "sample_ids": ["zOZleIRqZm4", "un9VQlzgZM"], "start_seconds": ["80", "5"], "properties": ["light, rustling, man", "females, talk, laugh"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking and laughing with wind noise and breathing in the background "], "question": "which entity is a group of people", "label": 1}, {"captions": ["a man talks as several small engines run", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["u9A6VZQCZpU", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["a, man, talk", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["an aircraft engine runs", "a stream of water runs briefly"], "sample_ids": ["yLCORCnd35Q", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["engine, aircraft, runs", "stream, water, run"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a person sniffs and sneezes", "someone is typing on a computer keyboard"], "sample_ids": ["uRlbY6aoBU", "v0x1odnXtP0"], "start_seconds": ["0", "210"], "properties": ["sneezes, person, sniffs", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is sneezing ", "a person is typing on a keyboard"], "question": "which person is typing on a computer keyboard", "label": 1}, {"captions": ["a beep occurs briefly", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["xtWeJ56-U-g", "xKB8O8LTs6s"], "start_seconds": ["20", "70"], "properties": ["beep, occur, briefly", "music, gunfire, explosion"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["two frogs croak at each other", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["zg0X6BnhOLQ", "sLUnaPT5gM8"], "start_seconds": ["410", "0"], "properties": ["two frogs, croak, at each other", "loud, laughter, intermittent"], "captions_pred_video": ["footage of lightning in the sky at night", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a frog is croaking", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a siren comes to life as a horn blares", "a vehicle engine accelerating then running on idle"], "sample_ids": ["u--KhUW8l1Y", "vYkA3cfXp5Q"], "start_seconds": ["0", "30"], "properties": ["horn, siren, life", "engine, accelerate, idle"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a clock ticktocks", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["v-g-j2uTByM", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["ticktocks, clock, ticktocks", "engine, idle, woman"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a clock is ticking loudly", "a woman is speaking and a subway train is moving "], "question": "which entity is not stationary", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["xM4joTqDVp4", "y2bVZ7rz-5M"], "start_seconds": ["160", "280"], "properties": ["background, chirp, birds", "motor noise, horn, siren"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is followed by a horn honking and a siren wailing?", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "someone is typing on a computer keyboard"], "sample_ids": ["vfYTJq7nU", "v0x1odnXtP0"], "start_seconds": ["130", "210"], "properties": ["rustling, ducks, quack", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["wP8ZKrlx3oA", "uZesmtKZGSw"], "start_seconds": ["40", "250"], "properties": ["rain, storm, thunder", "men, talk, cars"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["someone snores nearby", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["spJCm8tD9Zo", "uZesmtKZGSw"], "start_seconds": ["90", "250"], "properties": ["someone snores, nearby, someone", "men, talk, cars"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a woman sneezes then speaks", "a toilet flushes and a female speaks"], "sample_ids": ["x4dZyf9Gbj0", "yaln9y8I7ms"], "start_seconds": ["130", "230"], "properties": ["sneezes, speaks, woman", "female, flushes, toilet"], "captions_pred_video": ["footage is blurry and out of focus", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman sneezes and speaks", "a toilet flushes and a man speaks"], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a vehicle engine revs and tires squeal", "a person snores loudly multiple times at a close distance"], "sample_ids": ["yDoT73BWsdA", "sSMl2vc3ek"], "start_seconds": ["10", "20"], "properties": ["engine revs, tires squeal, vehicle", "loud, multiple, distance"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp, a woman speaks, and insects buzz", "a infant makes noise and is excited"], "sample_ids": ["t97k0cejSQE", "wIJK3-5y0kA"], "start_seconds": ["250", "30"], "properties": ["sound, chirp, buzz", "noise, excited, infant"], "captions_pred_video": ["a bee on a purple thistle flower", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a baby cries and a woman speaks"], "question": "which entity makes a lot of noise", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "an insect buzzes around continuously"], "sample_ids": ["su6FAOcOA8c", "v25l1jef3JY"], "start_seconds": ["4", "0"], "properties": ["engine, run, woman", "buzzes, continuously, insect"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "wind blowing followed by a zoom"], "sample_ids": ["sDSppXIlJrs", "vr8ZXjEBhMQ"], "start_seconds": ["27", "150"], "properties": ["microphone, water, wind", "wind, blow, zoom"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["the wind is blowing and water is splashing", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a video of wind blowing?", "label": 1}, {"captions": ["motors runs briefly and tires screech", "a helicopter engine runs continuously"], "sample_ids": ["yRx9txMcBl0", "ugHJF0hfYkg"], "start_seconds": ["40", "10"], "properties": ["motors, tires, screech", "engine, running, continuously"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a helicopter is flying overhead "], "question": "which entity is running continuously", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "an engine runs loudly"], "sample_ids": ["tiDFTC-5vU", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["male, duck, laugh", "loud, engine, run"], "captions_pred_video": [null, "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "a clock ticktocks"], "sample_ids": ["zcDwZ6W7E3E", "v-g-j2uTByM"], "start_seconds": ["180", "30"], "properties": ["man, speak, motorcycles", "ticktocks, clock, ticktocks"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["y2bVZ7rz-5M", "uEU-Hg5MTN8"], "start_seconds": ["280", "27"], "properties": ["engine, horn, siren", "a woman, laughs, animal"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a woman is speaking and a baby is crying"], "question": "which entity is a vehicle", "label": 0}, {"captions": ["a person is burping then speaks and laughs", "a stream of water flows as people talk and wind blows"], "sample_ids": ["wAAkbZToh8", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["burp, laugh, speak", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man burps and a woman speaks", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a toilet flushes and water drains"], "sample_ids": ["vzxHnu-SFEw", "sfAvvZwdLCY"], "start_seconds": ["80", "20"], "properties": ["two objects, woman, speak", "water drains, flushes, water"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "people cheer as a vehicle engine revs"], "sample_ids": ["yajyRTUQk3U", "xjhAnI2q6hM"], "start_seconds": ["400", "6"], "properties": ["noise, woman, speak", "engine revs, vehicle, people"], "captions_pred_video": ["- a woman cooking in the kitchen", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a goat screams and people speak in the background", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["xC8kbrKJmco", "wqZ135Ssz0"], "start_seconds": ["0", "60"], "properties": ["background, goat, scream", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a goat is bleating ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has a more natural background", "label": 1}, {"captions": ["a large crowd cheers and applauds", "vehicles pass by on a roadway"], "sample_ids": ["rqfQRErjfk8", "tgbONvsP47Y"], "start_seconds": ["170", "0"], "properties": ["crowd, cheers, applauds", "pass, vehicle, roadway"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks while video game music plays with some clicking", "a toilet flushes and water drains"], "sample_ids": ["tw76HGONaKg", "sfAvvZwdLCY"], "start_seconds": ["570", "20"], "properties": ["music, click, man", "water drains, flushes, water"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "a clock ticktocks in wind"], "sample_ids": ["zkKdxzNC97Y", "yVumC9TGknc"], "start_seconds": ["27", "30"], "properties": ["loud, bang, noise", "ticktocks, clock, wind"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "game title screen of the game shadow of the colossus on sony playstation 2"], "captions_pred_audio": ["a door is opened and closed", "a series of beeps and chirps"], "question": "which entity is quieter", "label": 1}, {"captions": ["white noise and birds chirping", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["wRBHTgrbiwg", "xfaoyyzw2WU"], "start_seconds": ["50", "180"], "properties": ["noise, white, chirping", "loud, jet engine, roar"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "an aircraft engine roars and a man speaks "], "question": "which noise is louder", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["yVumC9TGknc", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["humming, clock, birds", "engine, laugh, loud"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a series of beeps and chirps", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as a machine runs", "waves crash against a shoreline and people speak"], "sample_ids": ["vD6lYD1l0BY", "yFB25fqfU8I"], "start_seconds": ["330", "300"], "properties": ["a, machine, run", "wave, crash, shoreline"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more active", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["w8uLijTqtlU", "y8WEcpOlT3I"], "start_seconds": ["70", "40"], "properties": ["wind, microphone, noise", "harsh, wind, blows"], "captions_pred_video": ["footage is blurry and shaky", "on how to use a sewing machine youtube"], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking with wind noise in the background "], "question": "which entity is a recording of a harsh wind blowing?", "label": 1}, {"captions": ["an airplane engine runs", "people applaud and hoot and chat quietly"], "sample_ids": ["yVPZ2MNWpms", "wwyfGO2J4"], "start_seconds": ["0", "90"], "properties": ["engine, airplane, runs", "people, applaud, hoot"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", null], "captions_pred_audio": ["a car is driving by on the road ", "people are clapping and speaking with background noise "], "question": "which entity is a performance", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "a church bell rings several times"], "sample_ids": ["wyllXV6PjKo", "sUVVjE3Ucp8"], "start_seconds": ["30", "0"], "properties": ["a baby, a woman, a man", "ring, bell, several"], "captions_pred_video": [null, "the video shows a stone wall with a clock on top of it and a bench in front of it"], "captions_pred_audio": ["a woman speaks and a baby cries", "a church bell is ringing "], "question": "which entity is silent", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "waves crash against a shoreline and people speak"], "sample_ids": ["sHbXC6na9hg", "yFB25fqfU8I"], "start_seconds": ["0", "300"], "properties": ["a person, saw, wood", "wave, crash, shoreline"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "footage of a person surfing in the ocean"], "captions_pred_audio": ["an engine is idling and vibrating", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be in a natural environment", "label": 1}, {"captions": ["a heavy rain falls endlessly", "water flows as men speak and yell"], "sample_ids": ["wP8ZKrlx3oA", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["heavy, rain, fall", "water, flow, men"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of water flowing?", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "birds chirp and objects are moved around"], "sample_ids": ["w2M4i1mklOA", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["alarm, gears, turn", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of an antique clock", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "insects buzz and a man speaks"], "question": "which entity is more like a clock", "label": 0}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "the revving of an engine throttle followed by a man speaking"], "sample_ids": ["su6FAOcOA8c", "tezvROoo4bs"], "start_seconds": ["4", "40"], "properties": ["engine, idle, woman", "audio, throttle, speaking"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "footage of a busy city street with cars parked on both sides of the road"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a car accelerates and revs while a man speaks "], "question": "which entity is about a bus engine?", "label": 0}, {"captions": ["two frogs croak at each other", "a car speeding up in the distance"], "sample_ids": ["zg0X6BnhOLQ", "u0TrcHhkPQ"], "start_seconds": ["410", "20"], "properties": ["two frogs, croak, at each other", "distance, car, speed"], "captions_pred_video": ["footage of lightning in the sky at night", null], "captions_pred_audio": ["a frog is croaking", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["v0x1odnXtP0", "tDVADusiIoc"], "start_seconds": ["210", "60"], "properties": ["keyboard, type, computer", "water, radio, man"], "captions_pred_video": ["how to make money on youtube in spanish", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a person is typing on a keyboard", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a person?", "label": 0}, {"captions": ["an engine runs and a man speaks", "a duck quacks continuously"], "sample_ids": ["yT5WfYMRr-U", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["engine, run, man", "quacks, continuously, duck"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a person whistles and clicks a mouse", "a man speaks as a motor runs in the background"], "sample_ids": ["zCrAfDfv6-A", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["person, mouse, click", "background, motor, run"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a person whistles a song", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["yJ0TePmaOo", "uZesmtKZGSw"], "start_seconds": ["390", "250"], "properties": ["two hard objects, man, speak", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["running water in a faucet with some clinks", "a woman speaks and other women and a man talk with her"], "sample_ids": ["zNRChLjqcU", "vbpKkWvfOu4"], "start_seconds": ["220", "560"], "properties": ["water, faucet, run", "a, woman, man"], "captions_pred_video": [null, "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["water is running from a faucet into a sink", "a woman is speaking and a man is speaking"], "question": "which entity has more people", "label": 1}, {"captions": ["running water in a faucet with some clinks", "plastic is tapped on while someone speaks"], "sample_ids": ["zNRChLjqcU", "wvKpEYswXO0"], "start_seconds": ["220", "150"], "properties": ["water, faucet, run", "plastic, tap, speak"], "captions_pred_video": [null, "of the person preparing food in the kitchen"], "captions_pred_audio": ["water is running from a faucet into a sink", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "water splashes as an animal walks through"], "sample_ids": ["w8uLijTqtlU", "w1ir-sZ3Im8"], "start_seconds": ["70", "90"], "properties": ["wind, microphone, noise", "animal, water, splashes"], "captions_pred_video": ["footage is blurry and shaky", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["the wind is blowing strongly", "water splashes and gurgles as people speak"], "question": "which entity is more likely to be a recording", "label": 1}, {"captions": ["small dogs yip and bark sharply", "an infant crying frantically"], "sample_ids": ["v-wcQf4BDY0", "zwOBqeFTgiU"], "start_seconds": ["120", "30"], "properties": ["bark, yip, sharply", "cry, infant, frantically"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "of the baby crying in the car seat"], "captions_pred_audio": ["a dog barks and growls", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["material crumbles into a microphone", "some people speak"], "sample_ids": ["vofpvUo6NAw", "vbZ-0lGPneg"], "start_seconds": ["220", "30"], "properties": ["material, crumbles, microphone", "some people speak English, some people speak Spanish, some people speak French"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a woman is speaking and a dog is whimpering"], "question": "which entity is not a person", "label": 0}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["tDlfY3nmx1A", "uEU-Hg5MTN8"], "start_seconds": ["160", "27"], "properties": ["applause, laugh, man", "a woman, laughs, animal"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a woman is speaking and a baby is crying"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["a vehicle engine accelerating then running on idle", "an airplane engine runs"], "sample_ids": ["vYkA3cfXp5Q", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["engine, accelerate, idle", "engine, airplane, runs"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["an engine is idling", "a car is driving by on the road "], "question": "which entity has a running engine", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "a stream of water flows as people talk and wind blows"], "sample_ids": ["uYT5gxnyMWM", "xBxDz0CFVn0"], "start_seconds": ["50", "30"], "properties": ["a, scream, girl", "stream, water, flow"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking with wind noise in the background "], "question": "which entity is a moving object", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "a car speeding up in the distance"], "sample_ids": ["sZPuqDgX2V0", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["commentator, race, track", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a race car accelerates and revs its engine "], "question": "which car is speeding up in the distance", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["w2JXXIAdUdg", "uZesmtKZGSw"], "start_seconds": ["10", "250"], "properties": ["snoring, distance, person", "men, talk, cars"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more likely to be a dream", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vzxHnu-SFEw", "uZesmtKZGSw"], "start_seconds": ["80", "250"], "properties": ["two objects, woman, speak", "men, talk, cars"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["bees buzz and wind blows", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["tMJne1a4AFI", "w5W5Kqtc8E"], "start_seconds": ["0", "100"], "properties": ["bees buzz, wind blows, bees", "wind, blow, vehicle"], "captions_pred_video": ["a swarm of bees on the ground", null], "captions_pred_audio": ["a swarm of bees buzzing around", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blows?", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["y8WEcpOlT3I", "xKB8O8LTs6s"], "start_seconds": ["40", "70"], "properties": ["harsh, wind, blows", "music, gunfire, explosion"], "captions_pred_video": ["on how to use a sewing machine youtube", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["waves crash against a shoreline and people speak", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["yFB25fqfU8I", "vfYTJq7nU"], "start_seconds": ["300", "130"], "properties": ["wave, crash, shoreline", "rustling, ducks, quack"], "captions_pred_video": ["footage of a person surfing in the ocean", null], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "a duck quacks and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["motors runs briefly and tires screech", "a person snores loudly multiple times at a close distance"], "sample_ids": ["yRx9txMcBl0", "sSMl2vc3ek"], "start_seconds": ["40", "20"], "properties": ["motors, tires, screech", "loud, multiple, distance"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", null], "captions_pred_audio": ["a car is revving its engine and skidding ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["xyx6eNVEYRY", "tw76HGONaKg"], "start_seconds": ["380", "570"], "properties": ["loud, engine, muffles", "A, game, keyboard"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a man speaks and types on a computer keyboard "], "question": "which man is speaking", "label": 1}, {"captions": ["a woman and man speak while food is frying", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["zk-xJGQU8-4", "w5W5Kqtc8E"], "start_seconds": ["130", "100"], "properties": ["food, man, woman", "wind, blow, vehicle"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", null], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blowing?", "label": 1}, {"captions": ["a car speeding up in the distance", "an insect buzzes around continuously"], "sample_ids": ["u0TrcHhkPQ", "v25l1jef3JY"], "start_seconds": ["20", "0"], "properties": ["distance, car, speed", "buzzes, continuously, insect"], "captions_pred_video": [null, "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a fly is buzzing around a microphone "], "question": "which entity is moving faster", "label": 0}, {"captions": ["a woman speaks and is crumpling paper", "a child speaks in closed space"], "sample_ids": ["xvDdE3zNf8Y", "yW6FWLSLkx4"], "start_seconds": ["120", "40"], "properties": ["A, crumple, paper", "child, space, speak"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman speaks and crumples paper", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a kid speaks followed by music playing", "a car accelerates and wind blows"], "sample_ids": ["tQWGZLItBXk", "u0TrcHhkPQ"], "start_seconds": ["170", "20"], "properties": ["music, kid, speak", "accelerates, wind, blows"], "captions_pred_video": ["worms revolution screenshots", null], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["wRV8yMk886E", "yDoT73BWsdA"], "start_seconds": ["0", "10"], "properties": ["liquid, spray, nozzle", "engine, revs, vehicle"], "captions_pred_video": ["two cars are parked in a parking lot at night", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "people speak as gunfire rings out"], "sample_ids": ["sQGXqGcwOTc", "wqTCwqVRDlk"], "start_seconds": ["3", "80"], "properties": ["cling, speak, dishes", "gunfire, ring, speak"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "popping and crackling repeats as men yell and laugh"], "sample_ids": ["y8WEcpOlT3I", "rqu8iB22IY"], "start_seconds": ["40", "5"], "properties": ["wind, speak, buffeting", "sound, repeats, laugh"], "captions_pred_video": ["on how to use a sewing machine youtube", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a dog barks and a man speaks while music plays "], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["continuous snoring", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["sLkeqCDJIyw", "uEU-Hg5MTN8"], "start_seconds": ["120", "27"], "properties": ["loud, snoring, noise", "a woman, laughs, animal"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is not a person", "label": 0}, {"captions": ["a helicopter engine idles continuously", "continuous snoring"], "sample_ids": ["ugHJF0hfYkg", "sLkeqCDJIyw"], "start_seconds": ["10", "120"], "properties": ["engine, idle, continuously", "loud, snoring, noise"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", ", what is the man doing on the couch? sleeping"], "captions_pred_audio": ["a helicopter is flying overhead ", "a person is snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["sncRqQ67iJU", "tDVADusiIoc"], "start_seconds": ["460", "60"], "properties": ["loud, repeatedly, man", "water, radio, man"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a person is snoring", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a man?", "label": 0}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "a car accelerates and wind blows"], "sample_ids": ["slZLHwNbbt4", "u0TrcHhkPQ"], "start_seconds": ["300", "20"], "properties": ["clap, distance, horn", "accelerates, wind, blows"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", null], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["xNMovAf3o50", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["rain, thunder, music", "a woman, laughs, animal"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["thunder and rain with music playing in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a movie", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "water splashes and a door squeaks"], "sample_ids": ["sjlVMgdGSK0", "sdXV-ylviw"], "start_seconds": ["30", "190"], "properties": ["car, revving, loudly", "sound, splash, door"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a dog barks and taps with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["uPDn2BFTHk", "vfYTJq7nU"], "start_seconds": ["140", "130"], "properties": ["lady, laugh, baby", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a duck quacks and a woman speaks"], "question": "which entity is about a baby?", "label": 0}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["uPDn2BFTHk", "wz7N8YRy74I"], "start_seconds": ["140", "30"], "properties": ["lady, laugh, baby", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has more people", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "wind blowing followed by a zoom"], "sample_ids": ["vddP56-ogds", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["water, flow, laugh", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more likely to blow", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "a person speaks briefly"], "sample_ids": ["zF8yoL0rkbI", "zOZleIRqZm4"], "start_seconds": ["30", "80"], "properties": ["engine, run, someone", "person, talk, brief"], "captions_pred_video": ["footage of the traffic on the street at night", "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a man is speaking with crickets chirping in the background"], "question": "which entity is talking", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "an insect buzzes around continuously"], "sample_ids": ["sQGXqGcwOTc", "v25l1jef3JY"], "start_seconds": ["3", "0"], "properties": ["cling, speak, dishes", "buzzes, continuously, insect"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "a motor slows to a stopover traffic noises"], "sample_ids": ["y8WEcpOlT3I", "zofjfKhqLk8"], "start_seconds": ["40", "10"], "properties": ["wind, speak, buffeting", "noise, stop, motor"], "captions_pred_video": ["on how to use a sewing machine youtube", "footage of a man using a machine to cut a piece of wood"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a large engine is running and a bell is ringing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "people speak as gunfire rings out"], "sample_ids": ["zhx6hoYrHeI", "wqTCwqVRDlk"], "start_seconds": ["160", "80"], "properties": ["engine, sputter, rough", "gunfire, ring, speak"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["an airplane engine spools and people speak", "a crowd yells, reacts and applauds"], "sample_ids": ["wTjoRj1se3U", "wztCSUxOf8"], "start_seconds": ["390", "130"], "properties": ["airplane, engine, spool", "a crowd, yells, applauds"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", null], "captions_pred_audio": ["a jet engine is running and people are talking", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "an airplane engine spools and people speak"], "sample_ids": ["xKB8O8LTs6s", "wTjoRj1se3U"], "start_seconds": ["70", "390"], "properties": ["music, radio, gunshots", "airplane, engine, spool"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a jet engine is running and people are talking"], "question": "which entity is a video of a plane engine spooling?", "label": 1}, {"captions": ["a door opens and birds chirp", "water rushes and then a vehicle zooms past"], "sample_ids": ["yeFvk9x0wWI", "s4Uz1Ffgo04"], "start_seconds": ["30", "100"], "properties": ["door, open, birds", "water, rushes, vehicle"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a goat screams and people speak in the background", "bird squawks are accompanied by a man and woman speaking"], "sample_ids": ["xC8kbrKJmco", "wqZ135Ssz0"], "start_seconds": ["0", "60"], "properties": ["background, goat, scream", "man, woman, squawks"], "captions_pred_video": [null, null], "captions_pred_audio": ["a goat is bleating ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is accompanied by a man and woman speaking", "label": 1}, {"captions": ["a woman and man are speaking", "a toilet flushes and water drains"], "sample_ids": ["vbpKkWvfOu4", "sfAvvZwdLCY"], "start_seconds": ["560", "20"], "properties": ["two people, speaking, woman, man", "water drains, flushes, water"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vbpKkWvfOu4", "xBxDz0CFVn0"], "start_seconds": ["560", "30"], "properties": ["a, man, speaks", "stream, water, flow"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a man is speaking with wind noise in the background "], "question": "which entity is a video of a woman speaking and then a man speaking?", "label": 0}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "water pouring and bubbling"], "sample_ids": ["zY3icUyMdh8", "uyRfq-jKPpo"], "start_seconds": ["20", "50"], "properties": ["dog, bark, engine", "water, bubbles, pouring"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "people speak then an engine runs"], "sample_ids": ["yDoT73BWsdA", "uMTTDZ2mb4"], "start_seconds": ["10", "30"], "properties": ["engine, revs, vehicle", "engine, run, people"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "people are talking and a car is driving by with wind noise in the background "], "question": "which entity has a vehicle with an engine?", "label": 0}, {"captions": ["a man talks while vehicles pass by", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["sK4u5T8hW78", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["a, man, talk", "beeps, hit, woman"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a beep sounds followed by a child speaking"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["someone whistles briefly", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["uFoga8sHpiw", "w5W5Kqtc8E"], "start_seconds": ["90", "100"], "properties": ["sound, duration, pitch", "wind, blow, vehicle"], "captions_pred_video": ["footage of a bird in a cage", null], "captions_pred_audio": ["a person whistles a song", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a longer duration", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "a clock ticktocks"], "sample_ids": ["vfYTJq7nU", "v-g-j2uTByM"], "start_seconds": ["130", "30"], "properties": ["rustling, ducks, quack", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a clock is ticking loudly"], "question": "which entity is a clock?", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "a toilet flushes and a female speaks"], "sample_ids": ["x6ijhqRY38s", "yaln9y8I7ms"], "start_seconds": ["250", "230"], "properties": ["bowl, silverware, man", "female, flushes, toilet"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a toilet flushes and a man speaks"], "question": "which entity is about a toilet?", "label": 1}, {"captions": ["a toilet flushes and water drains", "someone is typing on a computer keyboard"], "sample_ids": ["sfAvvZwdLCY", "v0x1odnXtP0"], "start_seconds": ["20", "210"], "properties": ["water drains, flushes, water", "keyboard, type, computer"], "captions_pred_video": ["footage of the toilet in the bathroom", "how to make money on youtube in spanish"], "captions_pred_audio": ["a toilet is flushed", "a person is typing on a keyboard"], "question": "which object is used to type on a computer", "label": 1}, {"captions": ["a beep occurs briefly", "a clock ticktocks"], "sample_ids": ["xtWeJ56-U-g", "v-g-j2uTByM"], "start_seconds": ["20", "30"], "properties": ["beep, occur, briefly", "ticktocks, clock, ticktocks"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "a clock is ticking loudly"], "question": "which entity is a clock?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a horn rings out as a machine runs by"], "sample_ids": ["sfAvvZwdLCY", "slZLHwNbbt4"], "start_seconds": ["20", "300"], "properties": ["water drains, flushes, water", "a, horn, run"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a toilet is flushed", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "a child speaks in closed space"], "sample_ids": ["xjvTpk2Zpr8", "yW6FWLSLkx4"], "start_seconds": ["70", "40"], "properties": ["engine, run, wind", "child, space, speak"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["birds twitter and chirp and clatter", "someone is typing on a computer keyboard"], "sample_ids": ["yeFvk9x0wWI", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["chirp, twitter, clatter", "keyboard, type, computer"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "how to make money on youtube in spanish"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a person is typing on a keyboard"], "question": "which entity is typing", "label": 1}, {"captions": ["a baby laugh at a sputter", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["sLUnaPT5gM8", "xfaoyyzw2WU"], "start_seconds": ["0", "180"], "properties": ["laugh, sputter, baby", "loud, jet engine, roar"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vSeGhaZt-aI", "vbZ-0lGPneg"], "start_seconds": ["50", "30"], "properties": ["water, bubbles, speak", "a woman, a television program, a bird"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program?", "label": 1}, {"captions": ["a helicopter engine runs continuously", "dishes cling together then a man begins to speak"], "sample_ids": ["ugHJF0hfYkg", "sQGXqGcwOTc"], "start_seconds": ["10", "3"], "properties": ["engine, running, continuously", "cling, speak, dishes"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a helicopter is flying overhead ", "mechanisms are operating and water is splashing "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "birds chirp and objects are moved around"], "sample_ids": ["rwTERCUno", "yPUYU6t3rwo"], "start_seconds": ["90", "370"], "properties": ["engine, idle, sputter", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["an engine is idling and vibrating", "insects buzz and a man speaks"], "question": "which entity is not a person?", "label": 0}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "winds blows roughly as a vehicle races past"], "sample_ids": ["yYEVLuqEytU", "xjvTpk2Zpr8"], "start_seconds": ["40", "70"], "properties": ["animal, pig, background", "wind, blows, vehicle"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "a duck quacks continuously"], "sample_ids": ["yks4cLgIDMc", "vh30P49Po6s"], "start_seconds": ["170", "30"], "properties": ["background, speaking, child", "quacks, continuously, duck"], "captions_pred_video": ["footage of two kids wrestling on the floor", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking and a child is crying", "a duck is quacking loudly"], "question": "which entity is speaking", "label": 0}, {"captions": ["some men converse over an engine running", "people cheer as a vehicle engine revs"], "sample_ids": ["sCiy7QS1U", "xjhAnI2q6hM"], "start_seconds": ["300", "6"], "properties": ["men, converse, engine", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a truck is revving its engine and a man is speaking "], "question": "which entity shows people cheering?", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["x6ijhqRY38s", "zl9Dqx-j7q4"], "start_seconds": ["250", "6"], "properties": ["bowl, silverware, man", "engine, laugh, loud"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a jet engine roars "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["w6RTHR6AeAg", "w5W5Kqtc8E"], "start_seconds": ["40", "100"], "properties": ["call, owl, screech", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "a man talks nearby and another man talks far away while some liquid flows"], "sample_ids": ["zl9Dqx-j7q4", "sapQIQUhFc"], "start_seconds": ["6", "280"], "properties": ["engine, laugh, loud", "liquid, flow, distance"], "captions_pred_video": ["footage of a man driving a car in the dark", null], "captions_pred_audio": ["a jet engine roars ", "a man is speaking and a stream is flowing in the background "], "question": "which entity is more distant", "label": 1}, {"captions": ["a stream runs then someone speaks", "a woman speaks and is crumpling paper"], "sample_ids": ["wbHTKEJZyhc", "xvDdE3zNf8Y"], "start_seconds": ["20", "120"], "properties": ["stream, run, someone", "A, crumple, paper"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "of a woman in a white shirt and glasses holding a purple tie"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a woman speaks and crumples paper"], "question": "which entity is crumpling paper", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "someone is typing on a computer keyboard"], "sample_ids": ["u7C-AEBQM", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["ticks, rhythmic, quiet", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a ticktock of a clock", "a person is typing on a keyboard"], "question": "which is quieter", "label": 1}, {"captions": ["a person sniffles and sneezes", "a woman speaks as she rubs two objects together"], "sample_ids": ["uRlbY6aoBU", "vzxHnu-SFEw"], "start_seconds": ["0", "80"], "properties": ["sneezes, sniffles, person", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is sneezing ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 0}, {"captions": ["someone snores nearby", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["spJCm8tD9Zo", "uZesmtKZGSw"], "start_seconds": ["90", "250"], "properties": ["someone snores, nearby, someone", "men, talk, cars"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["an emergency siren wails as it passes", "some tunes played by whistling"], "sample_ids": ["vGj1XLJvNrw", "u6BnG6YZqJ4"], "start_seconds": ["0", "0"], "properties": ["wails, wails, pass", "tune, play, whistling"], "captions_pred_video": ["footage of a police car driving down a city street", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "a person whistling a song"], "question": "which entity is not playing a tune?", "label": 0}, {"captions": ["people speak and laugh as a child speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["sa6TLVbooCc", "tDVADusiIoc"], "start_seconds": ["240", "60"], "properties": ["people, laugh, child", "water, radio, man"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a child speaking?", "label": 0}, {"captions": ["birds chirps while a siren signals in the distance", "a car speeding up in the distance"], "sample_ids": ["uKCSGgof8gI", "u0TrcHhkPQ"], "start_seconds": ["12", "20"], "properties": ["chirps, distance, signal", "distance, car, speed"], "captions_pred_video": ["footage of a street in a small town on a sunny day", null], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a train engine runs and a horn blows", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["zPX9o1uDiI", "zFjIWfSD-4"], "start_seconds": ["40", "410"], "properties": ["engine, horn, run", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["two women and a man talk while a kid cries", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["wyllXV6PjKo", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["a kid, talk, cry", "loud, laughter, intermittent"], "captions_pred_video": [null, "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a woman speaks and a baby cries", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "an insect buzzes around continuously"], "sample_ids": ["wvKpEYswXO0", "v25l1jef3JY"], "start_seconds": ["150", "0"], "properties": ["water, tap, run", "buzzes, continuously, insect"], "captions_pred_video": ["of the person preparing food in the kitchen", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["a person is snoring while sleeping", "a man speaks as a motor runs in the background"], "sample_ids": ["vJrjSeP17yE", "xZepNM9qcRA"], "start_seconds": ["40", "30"], "properties": ["a person is sleeping, snoring, person", "background, motor, run"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a person snoring loudly", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 0}, {"captions": ["multiple birds chirp and an animal grunts", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["tDlysoZiA1I", "tDlysoZiA1I"], "start_seconds": ["0", "0"], "properties": ["animal, grunt, multiple", "animal, grunts, chirps"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "birds are chirping and a rooster is crowing "], "question": "which entity has more grunts", "label": 1}, {"captions": ["a cat meows and children speak", "material crumbles into a microphone"], "sample_ids": ["x5cuQjOdM3E", "vofpvUo6NAw"], "start_seconds": ["30", "220"], "properties": ["cat, speak, children", "material, crumbles, microphone"], "captions_pred_video": ["a black background with an airplane flying in the sky", "person wrapping a toy car in a plastic bag"], "captions_pred_audio": ["a cat meows and a woman speaks", "paper is being crumpled and crinkled"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wqUmIEzuNz4", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["frog, bird, vocalize", "loud, multiple, distance"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", null], "captions_pred_audio": ["a cat meows and rustles", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sG7TyPnFDR0", "su6FAOcOA8c"], "start_seconds": ["180", "4"], "properties": ["beeps, machine, smoke alarm", "engine, idle, woman"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "an infant crying and a woman speaking with some distant murmuring"], "sample_ids": ["vb1fPSDI4c", "smDKStoHBJo"], "start_seconds": ["30", "0"], "properties": ["multiple, people, yell", "a, infant, speaking"], "captions_pred_video": [null, "a man holding a crying baby in his arms"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a baby is crying and a woman is speaking"], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "an animal quacks rapidly"], "sample_ids": ["vfYTJq7nU", "vh30P49Po6s"], "start_seconds": ["130", "30"], "properties": ["rustling, ducks, quack", "animal, quacks, rapidly"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a duck is quacking loudly"], "question": "which entity is a duck?", "label": 0}, {"captions": ["a beep repeats multiple times", "a duck quacks loudly and continuously"], "sample_ids": ["y682ml90jGw", "vh30P49Po6s"], "start_seconds": ["11", "30"], "properties": ["beep, repeat, multiple", "loud, continuous, quacks"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a beeping sound is being made ", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a clock ticktocks in wind", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["yVumC9TGknc", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["ticktocks, clock, wind", "loud, jet engine, roar"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a series of beeps and chirps", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a man speaks as a car is passing by"], "sample_ids": ["wvKpEYswXO0", "sK4u5T8hW78"], "start_seconds": ["150", "30"], "properties": ["sound, water, running", "a, car, pass"], "captions_pred_video": ["of the person preparing food in the kitchen", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wyllXV6PjKo", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["a baby, a woman, a man", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman speaks and a baby cries", "a man is speaking and a crowd is clapping"], "question": "which entity has more people", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "water splashes as an animal walks through"], "sample_ids": ["sWZzXuWYY", "w1ir-sZ3Im8"], "start_seconds": ["420", "90"], "properties": ["male, clanks, thumps", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["xjvTpk2Zpr8", "xKB8O8LTs6s"], "start_seconds": ["70", "70"], "properties": ["engine, run, wind", "music, gunfire, explosion"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a jet engine roars and wind blows ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is a movie?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a vehicle accelerates squealing tires"], "sample_ids": ["sfAvvZwdLCY", "sd7xVssqlw"], "start_seconds": ["20", "50"], "properties": ["water drains, flushes, water", "accelerates, tires, squealing"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["xOZfdgAgJ9o", "vbZ-0lGPneg"], "start_seconds": ["40", "30"], "properties": ["woman, whimpering, speaking", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking and a dog is whimpering"], "question": "which woman is speaking", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a stream of water flows as people talk and wind blows"], "sample_ids": ["xKB8O8LTs6s", "xBxDz0CFVn0"], "start_seconds": ["70", "30"], "properties": ["music, radio, gunshots", "stream, water, flow"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage is blurry and out of focus"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing?", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "a man speaks as a car is passing by"], "sample_ids": ["vveS8HT7Uog", "sK4u5T8hW78"], "start_seconds": ["100", "30"], "properties": ["a man, objects, speak", "a, car, pass"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a man is speaking with background noise and breathing sounds "], "question": "which object is rubbed together", "label": 0}, {"captions": ["a baby cries and a woman speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["tMbMDvT50j8", "wwyfGO2J4"], "start_seconds": ["12", "90"], "properties": ["a, cry, woman", "people, applaud, hoot"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", null], "captions_pred_audio": ["a baby cries and a woman speaks", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["xyL9F5VrjkE", "zFjIWfSD-4"], "start_seconds": ["20", "410"], "properties": ["wind, blows, vehicle", "People, motor, brakes"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", null], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "vehicles pass by on a roadway"], "sample_ids": ["xl2PIWyXaM", "tgbONvsP47Y"], "start_seconds": ["160", "0"], "properties": ["chirp, man, younger person", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["birds are chirping and people are talking", "a car is driving on the road "], "question": "which entity is more likely to be in motion", "label": 1}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vlS6YMeWAPo", "xfaoyyzw2WU"], "start_seconds": ["40", "180"], "properties": ["sheep, baa, birds", "loud, jet engine, roar"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a goat bleats and birds chirp", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["vbr9mHKc8WM", "sLUnaPT5gM8"], "start_seconds": ["40", "0"], "properties": ["noise, loudness, engine", "loud, laughter, intermittent"], "captions_pred_video": [null, "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["an engine is idling", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "some men converse over an engine running"], "sample_ids": ["xyL9F5VrjkE", "sCiy7QS1U"], "start_seconds": ["20", "300"], "properties": ["wind, blows, vehicle", "men, converse, engine"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", null], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks while water drains", "a drill drills through something then people begin laughing"], "sample_ids": ["vSeGhaZt-aI", "tEE3MpBt1sg"], "start_seconds": ["50", "50"], "properties": ["water, drain, man", "drill, something, laugh"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "people are laughing breathing and speaking with background noise "], "question": "which entity is about a drill?", "label": 1}, {"captions": ["birds chirp and a pop occurs before a man speaks", "a man speaks as a motor runs in the background"], "sample_ids": ["zuua6-5goWw", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["sound, pop, bird", "background, motor, run"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["an insect buzzes around continuously", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["v25l1jef3JY", "zFjIWfSD-4"], "start_seconds": ["0", "410"], "properties": ["buzzes, continuously, insect", "People, motor, brakes"], "captions_pred_video": ["a black background with a cartoon character in the foreground", null], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is not a person?", "label": 0}, {"captions": ["a man speaks as a car is passing by", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sK4u5T8hW78", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["a, car, pass", "female, spraying, scream"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["a duck quacks loudly and continuously", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vh30P49Po6s", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["loud, continuous, quacks", "multiple, people, yell"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "a crowd of people are talking and laughing"], "question": "which entity is quieter", "label": 1}, {"captions": ["birds chirp then an animal grunts", "a woman speaks and then a man speaks"], "sample_ids": ["tDlysoZiA1I", "vbpKkWvfOu4"], "start_seconds": ["0", "560"], "properties": ["animal, grunt, chirp", "a, man, speaks"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a woman is speaking and a man is speaking"], "question": "which entity is a human speaking?", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["zY3icUyMdh8", "wz7N8YRy74I"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "rooster, crow, background, men"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a bird?", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["uYT5gxnyMWM", "uEU-Hg5MTN8"], "start_seconds": ["50", "27"], "properties": ["female, spraying, scream", "a woman, laughs, animal"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a woman speaking and spraying?", "label": 0}, {"captions": ["a door opens and closes", "an engine runs loudly"], "sample_ids": ["vBHyYJ8pL0", "vqZuVbG6-HI"], "start_seconds": ["2", "130"], "properties": ["open, close, door", "loud, engine, run"], "captions_pred_video": [null, "footage is blurry because it's raining outside"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "water pouring and bubbling"], "sample_ids": ["uWPRNLnpy7Y", "uyRfq-jKPpo"], "start_seconds": ["10", "50"], "properties": ["accelerate, laugh, vehicle", "water, bubbles, pouring"], "captions_pred_video": ["is taken from a car driving down the street", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a car accelerates and revs its engine ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "an infant crying frantically"], "sample_ids": ["vXlk0lIQBFo", "zwOBqeFTgiU"], "start_seconds": ["470", "30"], "properties": ["wind, talk, vocalize", "cry, infant, frantically"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "of the baby crying in the car seat"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a man speaks as a car is passing by"], "sample_ids": ["tDVADusiIoc", "sK4u5T8hW78"], "start_seconds": ["60", "30"], "properties": ["water, radio, man", "a, car, pass"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more likely to be in a car", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "an airplane engine runs"], "sample_ids": ["zFjIWfSD-4", "yVPZ2MNWpms"], "start_seconds": ["410", "0"], "properties": ["People, motor, brakes", "engine, airplane, runs"], "captions_pred_video": [null, "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a car is driving by on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "water is sprayed across a hard surface"], "sample_ids": ["siJFXfGWgDk", "sQwlkXjQabo"], "start_seconds": ["50", "10"], "properties": ["a, bird, vehicle", "water, spray, surface"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "vehicles pass by on a roadway"], "sample_ids": ["sapQIQUhFc", "tgbONvsP47Y"], "start_seconds": ["280", "0"], "properties": ["liquid, flow, distance", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a car is driving on the road "], "question": "which entity is more likely to be in motion", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "birds chirp quietly and an adult man speaks"], "sample_ids": ["y8dSeubCNI", "zuua6-5goWw"], "start_seconds": ["4", "30"], "properties": ["engine revving, people speaking, motorcycle", "birds, chirp, quiet, man, speaks"], "captions_pred_video": [null, "in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot"], "captions_pred_audio": ["an engine revving and people talking in the background", "birds are chirping and a man is speaking with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "paper is crumpling consistently"], "sample_ids": ["wjsXBsc7M40", "v5cSxLaHADY"], "start_seconds": ["10", "0"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of the baby playing with a toothbrush", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a baby laughs and a woman speaks", "paper is crumpled and crinkled"], "question": "which entity is more likely to be a video", "label": 0}, {"captions": ["food is frying and sizzles", "people speak as gunfire rings out"], "sample_ids": ["zNRChLjqcU", "wqTCwqVRDlk"], "start_seconds": ["220", "80"], "properties": ["food is frying, sizzles, food", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["water is running from a faucet into a sink", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a diesel truck engine runs continuously", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sZvwOuuPGP0", "sLUnaPT5gM8"], "start_seconds": ["50", "0"], "properties": ["engine, diesel, truck", "loud, laughter, intermittent"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a medium engine is running ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is intermittent", "label": 1}, {"captions": ["scraping and female speech with distant music", "a man talks while a clock does ticktock"], "sample_ids": ["yHeVV-xeOxQ", "spYNpeN7rPY"], "start_seconds": ["130", "1"], "properties": ["female, speech, music", "a clock, ticktock, man"], "captions_pred_video": ["of a girl milking a goat's udder", "in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a man is speaking and breathing with background noise "], "question": "which entity is a clock?", "label": 1}, {"captions": ["rustling with distant murmuring", "a stream of water flows as people talk and wind blows"], "sample_ids": ["wnNNcxAPwGQ", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["sound, distance, rustling", "stream, water, flow"], "captions_pred_video": ["footage of a yellow truck doing a burnout on a race track", "footage is blurry and out of focus"], "captions_pred_audio": ["a crowd of people are talking and laughing while a skateboard rolls by ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a goat screams and people speak in the background", "speaking following by laughing and clapping"], "sample_ids": ["xC8kbrKJmco", "u2f5NpsoHBg"], "start_seconds": ["0", "30"], "properties": ["background, goat, scream", "person, laugh, clap"], "captions_pred_video": [null, "is being projected on a screen at the front of the stage"], "captions_pred_audio": ["a goat is bleating ", "a woman is speaking and a crowd is clapping"], "question": "which entity is more likely to be a joke", "label": 1}, {"captions": ["a dark barks and whimpers", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sYj4hpDUZDQ", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["barks, whimpers, dark", "music, gunfire, explosion"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a dog barks and a cat meows", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a baby cries and wails as an adult female speaks", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zliInBdC98Y", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["a, baby, cries, wails", "applause, audience, yells"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["an audience gives applause as a man yells and a group sings", "water is sprayed across a hard surface"], "sample_ids": ["tdWhHV3X25Q", "sQwlkXjQabo"], "start_seconds": ["60", "10"], "properties": ["applause, audience, yells", "water, spray, surface"], "captions_pred_video": ["a man is talking to another man on a stage in front of a microphone", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["sawing of wood and rustling with leaves blowing in the distance", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["uiItxDsDMFI", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["sound, distance, leaves", "music, gunfire, explosion"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a saw is being used with background noise ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "a woman speaks happily and an animal chirps"], "sample_ids": ["tDVADusiIoc", "uWAAAL4CIoc"], "start_seconds": ["60", "0"], "properties": ["wind, radio, waves", "a woman, chirps, animal"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman is speaking and a dog is barking "], "question": "which entity is more calm", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "water flows and trickles"], "sample_ids": ["sQwlkXjQabo", "tB7hWb9gTuQ"], "start_seconds": ["10", "30"], "properties": ["liquid, surface, spray", "water, flow, trickle"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["spraying followed by silence", "water is splashing and gurgling"], "question": "which entity is flowing", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "a man speaks followed by another man speaking outside"], "sample_ids": ["wudZTNBtVqc", "viuTg1M-dqg"], "start_seconds": ["60", "30"], "properties": ["accelerates, engine, wind", "two men, speak, follow"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more than one speaker?", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "some tunes played by whistling"], "sample_ids": ["tEE3MpBt1sg", "u6BnG6YZqJ4"], "start_seconds": ["50", "0"], "properties": ["drill, something, laugh", "tune, play, whistling"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["sTpirNYo8vQ", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["a, tone, fast", "engine, laugh, loud"], "captions_pred_video": ["of a man taking a selfie on a bus", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a wooden clack accompanies nearby chirping birds"], "sample_ids": ["vbZ-0lGPneg", "yeFvk9x0wWI"], "start_seconds": ["30", "30"], "properties": ["a woman, a television program, a bird", "clack, bird, chirp"], "captions_pred_video": ["of a man holding a baby duck in his hands", "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "birds chirp in the background as a car drives by "], "question": "which entity has more birds", "label": 1}, {"captions": ["a man speaks, then dials a rotary telephone", "a vehicle engine accelerating then running on idle"], "sample_ids": ["tK4VlLsNxak", "vYkA3cfXp5Q"], "start_seconds": ["120", "30"], "properties": ["a, dial, telephone", "engine, accelerate, idle"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["birds vocalize and chirp continuously", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["w1mlz3Pe4fU", "zl9Dqx-j7q4"], "start_seconds": ["300", "6"], "properties": ["vocalize, chirp, continuously", "engine, laugh, loud"], "captions_pred_video": ["of a bird in a cage", "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds are chirping and singing", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["the rumbling of a bus followed by a soft male voice", "water quietly rushes by while birds chirp in the background"], "sample_ids": ["vK93VuO0yNc", "sYITalLZjj4"], "start_seconds": ["30", "30"], "properties": ["male voice, bus, rumble", "water, rushes, background, birds"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", "two ducks are swimming in the water near each other"], "captions_pred_audio": ["a car drives by with wind noise in the background ", "wind blows and birds chirp"], "question": "which entity is quieter", "label": 1}, {"captions": ["various birds chirp and squeal, and an animal grunts", "pigeons vocalize and birds chirp"], "sample_ids": ["tDlysoZiA1I", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["animal, grunts, chirps", "vocalize, bird, chirp"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "of the pigeon in the cage"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["an infant crying as a woman laughs", "a car speeding up in the distance"], "sample_ids": ["xhmRY9yhC7c", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["a, laugh, infant", "distance, car, speed"], "captions_pred_video": ["of a baby crying in a baby bouncer", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a man talks as several small engines run", "water splashes and a motorboat passes as people yell"], "sample_ids": ["u9A6VZQCZpU", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["a, man, talk", "water, splashes, motorboat"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about water?", "label": 1}, {"captions": ["an engine runs and a man speaks", "a vehicle engine accelerating then running on idle"], "sample_ids": ["yT5WfYMRr-U", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["engine, run, man", "engine, accelerate, idle"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "an engine is idling"], "question": "which entity is a vehicle engine?", "label": 1}, {"captions": ["a beep occurs briefly", "a man speaks followed by another man speaking outside"], "sample_ids": ["xtWeJ56-U-g", "viuTg1M-dqg"], "start_seconds": ["20", "30"], "properties": ["beep, occur, briefly", "two men, speak, follow"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of two men speaking?", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "three men talk while wind blows and some liquid flows"], "sample_ids": ["su6FAOcOA8c", "vJ7JPEFhyLA"], "start_seconds": ["4", "16"], "properties": ["engine, idle, woman", "three men, wind, flow"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a bus engine?", "label": 0}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "small dogs yip and bark sharply"], "sample_ids": ["sjlVMgdGSK0", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["car, revving, loudly", "bark, yip, sharply"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a speedboat passes quickly on the water"], "sample_ids": ["vlJS7LN2XyM", "tjmoSi330GM"], "start_seconds": ["30", "23"], "properties": ["background, clocks, ticking", "speed, water, boat"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "a person riding a jet ski on a lake with trees in the background"], "captions_pred_audio": ["a ticktock of a clock", "a motorboat speeds through water with wind noise "], "question": "which is moving faster", "label": 1}, {"captions": ["birds chirp and wind blows", "paper is crumpling consistently"], "sample_ids": ["sxIvBMSavMQ", "v5cSxLaHADY"], "start_seconds": ["210", "0"], "properties": ["birds, chirp, wind", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "a man speaks as a car is passing by"], "sample_ids": ["uPDn2BFTHk", "sK4u5T8hW78"], "start_seconds": ["140", "30"], "properties": ["lady, laugh, baby", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["music plays followed by gunshots and then an explosion", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["xKB8O8LTs6s", "tiDFTC-5vU"], "start_seconds": ["70", "30"], "properties": ["music, gunshots, explosion", "male, duck, laugh"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", null], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking and ducks are quacking"], "question": "which entity is a comedy", "label": 1}, {"captions": ["a man speaks as crickets sing", "waves crash against a shoreline and people speak"], "sample_ids": ["ryFDPxgDOGc", "yFB25fqfU8I"], "start_seconds": ["570", "300"], "properties": ["a, crickets, sing", "wave, crash, shoreline"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be in a desert?", "label": 0}, {"captions": ["a vehicle engine runs and someone speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["zF8yoL0rkbI", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["engine, run, someone", "rooster, crow, background, men"], "captions_pred_video": ["footage of the traffic on the street at night", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["a man woman speak while crickets sing", "a toilet flushes and a female speaks"], "sample_ids": ["zTLVJCo4WEE", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["a, crickets, sing", "female, flushes, toilet"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a toilet flushes and a man speaks"], "question": "which entity has a female speaking?", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "birds chirp as a train approaches"], "sample_ids": ["wqADXCzngMw", "xM4joTqDVp4"], "start_seconds": ["340", "160"], "properties": ["engine, idle, man", "bird, chirp, train"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "birds are chirping and a train is moving "], "question": "which entity is a train", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "a person snores loudly multiple times at a close distance"], "sample_ids": ["w-4gHptFNuU", "sSMl2vc3ek"], "start_seconds": ["21", "20"], "properties": ["engine revs, accelerates, bump", "loud, multiple, distance"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "a duck quacks continuously"], "sample_ids": ["uzQnlJXBbOM", "vh30P49Po6s"], "start_seconds": ["50", "30"], "properties": ["ringing, beep, stop", "quacks, continuously, duck"], "captions_pred_video": ["footage of a person using a cell phone on a table", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a telephone rings and a man speaks", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "a machine beeps continuously"], "sample_ids": ["uYT5gxnyMWM", "y682ml90jGw"], "start_seconds": ["50", "11"], "properties": ["person, spray, yell", "beeps, machine, continuously"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "a stream of water runs briefly"], "sample_ids": ["wnpJndXuxLc", "x-PeY8Yb8M4"], "start_seconds": ["50", "300"], "properties": ["blows, vehicle, train", "stream, water, run"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a railroad crossing bell rings as a train horn blows", "a infant makes noise and is excited"], "sample_ids": ["tZGN5a7ybxo", "wIJK3-5y0kA"], "start_seconds": ["60", "30"], "properties": ["ring, train, horn", "noise, excited, infant"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a train is moving and blowing its horn ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "an infant crying frantically"], "sample_ids": ["yks4cLgIDMc", "zwOBqeFTgiU"], "start_seconds": ["170", "30"], "properties": ["background, speaking, child", "cry, infant, frantically"], "captions_pred_video": ["footage of two kids wrestling on the floor", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking and a child is crying", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["vddP56-ogds", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["water, splash, person, laugh", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "a man speaks as a car is passing by"], "sample_ids": ["rqu8iB22IY", "sK4u5T8hW78"], "start_seconds": ["5", "30"], "properties": ["sound, repeats, laugh", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["a woman speaks and food sizzles while frying", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["wTideSjRFS0", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["food, sizzle, woman", "harsh, wind, blows"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking with wind noise in the background "], "question": "which entity is about a woman speaking and food sizzling while frying?", "label": 0}, {"captions": ["water splashes and a motorboat passes as people yell", "winds blows roughly as a vehicle races past"], "sample_ids": ["w5W5Kqtc8E", "xjvTpk2Zpr8"], "start_seconds": ["100", "70"], "properties": ["water, splashes, motorboat", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in motion", "label": 1}, {"captions": ["a man talks as several small engines run", "a man speaks as a car is passing by"], "sample_ids": ["u9A6VZQCZpU", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["a, man, talk", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["zALy31PjDl0", "vbZ-0lGPneg"], "start_seconds": ["21", "30"], "properties": ["a man, a vehicle, a horn", "a woman, a television program, a bird"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a horn rings out as a machine runs by"], "sample_ids": ["wz7N8YRy74I", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["rooster, crow, background, people", "a, horn, run"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["v0x1odnXtP0", "tDlysoZiA1I"], "start_seconds": ["210", "0"], "properties": ["keyboard, type, computer", "animal, grunts, chirps"], "captions_pred_video": ["how to make money on youtube in spanish", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a person is typing on a keyboard", "birds are chirping and a rooster is crowing "], "question": "which entity is not a person?", "label": 1}, {"captions": ["water pouring and bubbling", "a car speeding up in the distance"], "sample_ids": ["uyRfq-jKPpo", "u0TrcHhkPQ"], "start_seconds": ["50", "20"], "properties": ["water, bubbles, pouring", "distance, car, speed"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", null], "captions_pred_audio": ["water is running from a faucet", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["several beeps are followed by a hit and a woman talking", "a machine clanks and thumps and a male speaks"], "sample_ids": ["w34HjHr6gAY", "sWZzXuWYY"], "start_seconds": ["30", "420"], "properties": ["beeps, hit, woman", "male, clanks, thumps"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", null], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a sewing machine runs and a man speaks"], "question": "which entity has a male speaking?", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "loud ringing of a telephone stops followed by a man speaking and a digital beep"], "sample_ids": ["vb1fPSDI4c", "uzQnlJXBbOM"], "start_seconds": ["30", "50"], "properties": ["multiple, people, yell", "ringing, beep, stop"], "captions_pred_video": [null, "footage of a person using a cell phone on a table"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a telephone rings and a man speaks"], "question": "which entity is quieter", "label": 1}, {"captions": ["a goat bleats as a person speaks", "a machine beeps continuously"], "sample_ids": ["tPJvjq9QePY", "y682ml90jGw"], "start_seconds": ["40", "11"], "properties": ["bleats, person, speak", "beeps, machine, continuously"], "captions_pred_video": ["a dog and a sheep in a barn", null], "captions_pred_audio": ["a baby cries and a man speaks", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["a clock ticktocks", "pigeons vocalize and birds chirp"], "sample_ids": ["v-g-j2uTByM", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["ticktocks, clock, ticktocks", "vocalize, bird, chirp"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "of the pigeon in the cage"], "captions_pred_audio": ["a clock is ticking loudly", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["xOZfdgAgJ9o", "y2bVZ7rz-5M"], "start_seconds": ["40", "280"], "properties": ["woman, whimpering, speaking", "motor noise, horn, siren"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a truck is honking its horn and a siren is blaring "], "question": "which entity is followed by a horn honking and a siren wailing?", "label": 1}, {"captions": ["some tunes played by whistling", "birds twitter and chirp and clatter"], "sample_ids": ["u6BnG6YZqJ4", "yeFvk9x0wWI"], "start_seconds": ["0", "30"], "properties": ["tune, play, whistling", "chirp, twitter, clatter"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["a person whistling a song", "birds chirp in the background as a car drives by "], "question": "which entity is not a musical instrument", "label": 1}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "wind blows and women speak as livestock vocalizes"], "sample_ids": ["wnpJndXuxLc", "vXlk0lIQBFo"], "start_seconds": ["50", "470"], "properties": ["beeps, loud, whistle", "wind, speak, vocalize"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "- a woman and two donkeys in a fenced in area"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "wind chimes are ringing and people are speaking and laughing "], "question": "which entity is not a series of light horn beeps followed by a loud steam whistle?", "label": 1}, {"captions": ["a motorcycle engine works nearby", "an airplane engine runs"], "sample_ids": ["tOSWIURC-4", "yVPZ2MNWpms"], "start_seconds": ["0", "0"], "properties": ["engine, work, nearby", "engine, airplane, runs"], "captions_pred_video": [null, "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a lawn mower is running ", "a car is driving by on the road "], "question": "which entity has a moving engine", "label": 1}, {"captions": ["an electronic device bleeps once", "a woman and man speak while food is frying"], "sample_ids": ["tHJ6JSa8Y4", "zk-xJGQU8-4"], "start_seconds": ["0", "130"], "properties": ["bleeps, electronic, device", "food, man, woman"], "captions_pred_video": [null, "a man and a woman cooking in a wok on the stove"], "captions_pred_audio": ["a clock is ticking and beeping", "a woman is speaking while dishes are clanging and music is playing in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "wind blows as people chatter quietly"], "sample_ids": ["w34HjHr6gAY", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["beeps, squawk, child speaking", "wind, chatter, people"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "footage is blurry and out of focus"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a woman and man speak while food is frying", "a machine beeps continuously"], "sample_ids": ["zk-xJGQU8-4", "y682ml90jGw"], "start_seconds": ["130", "11"], "properties": ["food, man, woman", "beeps, machine, continuously"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", null], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["water running down a sink while a man is talking", "vehicles pass by on a roadway"], "sample_ids": ["vSeGhaZt-aI", "tgbONvsP47Y"], "start_seconds": ["50", "0"], "properties": ["water, sink, talk", "pass, vehicle, roadway"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaking with light rustling", "a man speaks as a car is passing by"], "sample_ids": ["zOZleIRqZm4", "sK4u5T8hW78"], "start_seconds": ["80", "30"], "properties": ["light, rustling, man", "a, car, pass"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more likely to be in a city", "label": 1}, {"captions": ["some people speak", "a woman talking as an infant is crying"], "sample_ids": ["vbZ-0lGPneg", "tMbMDvT50j8"], "start_seconds": ["30", "12"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "a, talk, infant"], "captions_pred_video": ["of a man holding a baby duck in his hands", "shows a little girl covering her face with her hands while sitting at a table"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a baby cries and a woman speaks"], "question": "which entity is about a woman talking to an infant?", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "wind blowing followed by a zoom"], "sample_ids": ["xO-Q2BlIIPU", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["two men, exclamation, speak", "wind, blow, zoom"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a person whistles a meandering tune", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["uFoga8sHpiw", "wqZ135Ssz0"], "start_seconds": ["90", "60"], "properties": ["person, tune, whistle", "two men, woman, birds"], "captions_pred_video": ["footage of a bird in a cage", null], "captions_pred_audio": ["a person whistles a song", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["several ducks are quacking and squawking", "wind blowing followed by a zoom"], "sample_ids": ["wfHeoPDLMaM", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["quacking, squawking, ducks", "wind, blow, zoom"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["ducks are quacking", "wind blows and a chainsaw cuts through wood "], "question": "which entity is silent", "label": 1}, {"captions": ["a duck quacks several times", "a man speaks as a car is passing by"], "sample_ids": ["vh30P49Po6s", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["quacks, duck, several", "a, car, pass"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking with background noise and breathing sounds "], "question": "which entity is moving", "label": 1}, {"captions": ["a person speaks over rustling leaves", "a man speaks over intermittent keyboard taps"], "sample_ids": ["zOZleIRqZm4", "tw76HGONaKg"], "start_seconds": ["80", "570"], "properties": ["rustling, leaves, person", "audio, man, keyboard"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man speaks and types on a computer keyboard "], "question": "which entity is a person speaking over?", "label": 0}, {"captions": ["a goat bleats and someone makes a calling noise", "small dogs yip and bark sharply"], "sample_ids": ["vlS6YMeWAPo", "v-wcQf4BDY0"], "start_seconds": ["40", "120"], "properties": ["noise, bleat, call", "bark, yip, sharply"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a goat bleats and birds chirp", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["someone snores nearby", "a child speaks in closed space"], "sample_ids": ["spJCm8tD9Zo", "yW6FWLSLkx4"], "start_seconds": ["90", "40"], "properties": ["someone snores, nearby, someone", "child, space, speak"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a drill runs and two people laugh", "water splashes as an animal walks through"], "sample_ids": ["tEE3MpBt1sg", "w1ir-sZ3Im8"], "start_seconds": ["50", "90"], "properties": ["two people, laugh, drill", "animal, water, splashes"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wnpJndXuxLc", "tdWhHV3X25Q"], "start_seconds": ["50", "60"], "properties": ["blows, vehicle, train", "applause, audience, yells"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a man is speaking and a crowd is clapping"], "question": "which entity is a person", "label": 1}, {"captions": ["a person is burping while a girl speaks", "a motorcycle engine works nearby"], "sample_ids": ["vdoxuJn9lTc", "tOSWIURC-4"], "start_seconds": ["40", "0"], "properties": ["person, burp, girl", "engine, work, nearby"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", null], "captions_pred_audio": ["a child speaks followed by a burp", "a lawn mower is running "], "question": "which entity is working", "label": 1}, {"captions": ["a woman speaks and food sizzles while frying", "multiple people speak and children yell while water gurgles"], "sample_ids": ["wTideSjRFS0", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["food, sizzle, woman", "multiple, people, yell"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a crowd of people are talking and laughing"], "question": "which entity is more active", "label": 1}, {"captions": ["an aircraft engine runs", "a toilet flushes and a female speaks"], "sample_ids": ["yLCORCnd35Q", "yaln9y8I7ms"], "start_seconds": ["0", "230"], "properties": ["engine, aircraft, runs", "female, flushes, toilet"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "footage is blurry and out of focus"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a toilet flushes and a man speaks"], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "people cheer as a vehicle engine revs"], "sample_ids": ["zcDwZ6W7E3E", "xjhAnI2q6hM"], "start_seconds": ["180", "6"], "properties": ["a, man, speak", "engine revs, vehicle, people"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a male speaks over some small clicks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["uXxVebHsGZ8", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["male, clicks, speak", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "vehicles pass by on a roadway"], "sample_ids": ["sWZzXuWYY", "tgbONvsP47Y"], "start_seconds": ["420", "0"], "properties": ["male, clanks, thumps", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a car is driving on the road "], "question": "which entity is a video of vehicles passing by on a roadway?", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a piece of wood is being placed down and sawed"], "sample_ids": ["yYEVLuqEytU", "uiItxDsDMFI"], "start_seconds": ["40", "30"], "properties": ["animal, pig, background", "wood, piece, saw"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "a man cutting a log with an axe in the woods"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a saw is being used with background noise "], "question": "which entity is being cut", "label": 1}, {"captions": ["birds chirp as a train approaches", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["xM4joTqDVp4", "yajyRTUQk3U"], "start_seconds": ["160", "400"], "properties": ["bird, chirp, train", "a woman, something, fried"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "- a woman cooking in the kitchen"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "a child speaks in closed space"], "sample_ids": ["vcmWSmvti8", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["music, man, fire", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["male speech with light ticking", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["xO-Q2BlIIPU", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["male, speech, ticking", "rustling, ducks, quack"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["a propeller moves loudly nearby", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["ugHJF0hfYkg", "uEU-Hg5MTN8"], "start_seconds": ["10", "27"], "properties": ["loud, propeller, move", "a woman, laughs, animal"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and a baby is crying"], "question": "which entity is not moving", "label": 1}, {"captions": ["music plays followed by gunshots and then an explosion", "a telephone rings followed by a woman talking"], "sample_ids": ["xKB8O8LTs6s", "tGcFnX0GHI"], "start_seconds": ["70", "0"], "properties": ["music, gunshots, explosion", "ring, talk, woman"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", null], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "a woman speaks as she rubs two objects together"], "sample_ids": ["vh30P49Po6s", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["loud, continuous, quacks", "two objects, woman, speak"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a duck is quacking loudly", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is silent", "label": 1}, {"captions": ["birds chirp and objects are moved around", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["yPUYU6t3rwo", "wz7N8YRy74I"], "start_seconds": ["370", "30"], "properties": ["birds chirp, objects are moved around, birds", "rooster, crow, background, men"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["insects buzz and a man speaks", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is about a rooster?", "label": 1}, {"captions": ["a man speaks while water drains", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vSeGhaZt-aI", "vbZ-0lGPneg"], "start_seconds": ["50", "30"], "properties": ["water, drain, man", "a woman, a television program, a bird"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a woman is speaking and a dog is whimpering"], "question": "which entity has more moving parts", "label": 1}, {"captions": ["people speak as gunfire rings out", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["wqTCwqVRDlk", "tDVADusiIoc"], "start_seconds": ["80", "60"], "properties": ["gunfire, ring, speak", "water, radio, man"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking and a gun is fired", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["sTpirNYo8vQ", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["a, tone, fast", "harsh, wind, blows"], "captions_pred_video": ["of a man taking a selfie on a bus", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a man is speaking with wind noise in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a person whistles and clicks a mouse", "a woman speaks over sizzling noise"], "sample_ids": ["zCrAfDfv6-A", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["person, mouse, click", "noise, woman, speak"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a person whistles a song", "a woman is speaking while food is frying in the background"], "question": "which entity is speaking over noise", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "a car accelerates and wind blows"], "sample_ids": ["wTjoRj1se3U", "u0TrcHhkPQ"], "start_seconds": ["390", "20"], "properties": ["engine, run, people", "accelerates, wind, blows"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", null], "captions_pred_audio": ["a jet engine is running and people are talking", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["an infant crying as a woman laughs", "a clock ticktocks"], "sample_ids": ["xhmRY9yhC7c", "v-g-j2uTByM"], "start_seconds": ["20", "30"], "properties": ["a, laugh, infant", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of a baby crying in a baby bouncer", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a baby cries and a woman speaks", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a boat travels through the waves as the wind blows loudly and a man speaks over a radio"], "sample_ids": ["wvKpEYswXO0", "tDVADusiIoc"], "start_seconds": ["150", "60"], "properties": ["sound, water, running", "wind, radio, waves"], "captions_pred_video": ["of the person preparing food in the kitchen", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a radio?", "label": 1}, {"captions": ["birds coo incessantly", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["yZrFNS7GFBQ", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["coo, bird, incessant", "rustling, ducks, quack"], "captions_pred_video": ["of the bird in the cage", null], "captions_pred_audio": ["an owl hoots in the background ", "a duck quacks and a woman speaks"], "question": "which entity is a bird", "label": 0}, {"captions": ["several ducks quack and cocks crow far away", "a man speaks while rain falls onto a hard surface"], "sample_ids": ["sNB8zxXneIM", "wqN6IIHw3po"], "start_seconds": ["20", "30"], "properties": ["several, quack, cocks", "rain, surface, fall"], "captions_pred_video": ["a group of geese in a cage", "in your own words what is happening in this screenshot? blood splattered all over the place"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a man is speaking and water is splashing"], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["a door opens and birds chirp", "water flows and trickles"], "sample_ids": ["yeFvk9x0wWI", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["door, open, birds", "water, flow, trickle"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vJ7JPEFhyLA", "su6FAOcOA8c"], "start_seconds": ["16", "4"], "properties": ["three men, wind, flow", "engine, idle, woman"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "a goat bleats and someone makes a calling noise"], "sample_ids": ["w6RTHR6AeAg", "vlS6YMeWAPo"], "start_seconds": ["40", "40"], "properties": ["call, owl, screech", "noise, bleat, call"], "captions_pred_video": [null, "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a goat bleats and birds chirp"], "question": "which entity is a call", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "plastic is tapped on while someone speaks"], "sample_ids": ["sWZzXuWYY", "wvKpEYswXO0"], "start_seconds": ["420", "150"], "properties": ["male, speech, banging", "plastic, tap, speak"], "captions_pred_video": [null, "of the person preparing food in the kitchen"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is quieter", "label": 1}, {"captions": ["a person is snoring while sleeping", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vJrjSeP17yE", "vb1fPSDI4c"], "start_seconds": ["40", "30"], "properties": ["a person is sleeping, snoring, person", "multiple, people, yell"], "captions_pred_video": ["a black background with a small plane flying in the sky", null], "captions_pred_audio": ["a person snoring loudly", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a woman speaks over sizzling noise", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["yajyRTUQk3U", "ziUT9IFTkjg"], "start_seconds": ["400", "10"], "properties": ["noise, woman, speak", "background, birds, rustling"], "captions_pred_video": ["- a woman cooking in the kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wvKpEYswXO0", "vJ7JPEFhyLA"], "start_seconds": ["150", "16"], "properties": ["sound, water, running", "three men, wind, flow"], "captions_pred_video": ["of the person preparing food in the kitchen", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a woman speaking softly?", "label": 0}, {"captions": ["a man speaks over a running engine and blowing wind", "a female speaks softly as paper crinkles"], "sample_ids": ["ylpYOorfH4o", "xvDdE3zNf8Y"], "start_seconds": ["410", "120"], "properties": ["engine, running, wind", "a, female, speaks"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "of a woman in a white shirt and glasses holding a purple tie"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a woman speaks and crumples paper"], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["motors runs briefly and tires screech", "a duck quacks continuously"], "sample_ids": ["yRx9txMcBl0", "vh30P49Po6s"], "start_seconds": ["40", "30"], "properties": ["motors, tires, screech", "quacks, continuously, duck"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["birds chirp as a bell rings", "water splashes as an animal walks through"], "sample_ids": ["ziUT9IFTkjg", "w1ir-sZ3Im8"], "start_seconds": ["10", "90"], "properties": ["chirp, bell, ring", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["wqUmIEzuNz4", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["frog, bird, vocalize", "engine, laugh, loud"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a cat meows and rustles", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks followed by another man speaking outside", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["viuTg1M-dqg", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["two men, speak, follow", "engine, idle, woman"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["a heavy rain falls endlessly", "someone sprays a liquid onto a hard surface making a hiss sound"], "sample_ids": ["wP8ZKrlx3oA", "zO-LSSY92ZM"], "start_seconds": ["40", "30"], "properties": ["heavy, rain, fall", "liquid, surface, sound"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'"], "captions_pred_audio": ["a heavy rain is falling on a surface", "steam is hissing and hissing"], "question": "which entity is not a liquid?", "label": 0}, {"captions": ["winds blows roughly as a vehicle races past", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["xjvTpk2Zpr8", "ziUT9IFTkjg"], "start_seconds": ["70", "10"], "properties": ["wind, blows, vehicle", "background, birds, rustling"], "captions_pred_video": ["footage of a dhl plane landing on the runway", null], "captions_pred_audio": ["a jet engine roars and wind blows ", "birds are chirping and a chime is ringing "], "question": "which entity is more calm", "label": 1}, {"captions": ["a child speaks in closed space", "people speak as gunfire rings out"], "sample_ids": ["yW6FWLSLkx4", "wqTCwqVRDlk"], "start_seconds": ["40", "80"], "properties": ["child, space, speak", "gunfire, ring, speak"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war zone", "label": 1}, {"captions": ["someone is burping continuously", "people speak as gunfire rings out"], "sample_ids": ["y636gklDioE", "wqTCwqVRDlk"], "start_seconds": ["20", "80"], "properties": ["burps, burps, burps", "gunfire, ring, speak"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a person burps loudly several times", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a stream of water runs briefly", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["x-PeY8Yb8M4", "w5W5Kqtc8E"], "start_seconds": ["300", "100"], "properties": ["stream, water, run", "wind, blow, vehicle"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", null], "captions_pred_audio": ["a car is driving on a wet road ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "goats bleat and metal clings"], "sample_ids": ["tw76HGONaKg", "tH17JPjDPnc"], "start_seconds": ["570", "260"], "properties": ["A, game, keyboard", "bleat, metal, clings"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "feed of the goats eating hay in the barn"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a cow is mooing and mechanisms are ticking "], "question": "which entity is not a person?", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "an airplane engine spools and people speak"], "sample_ids": ["s6DESzUTGjY", "wTjoRj1se3U"], "start_seconds": ["16", "390"], "properties": ["wind, laugh, woman", "airplane, engine, spool"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a jet engine is running and people are talking"], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "a propeller rotates loudly and intensely"], "sample_ids": ["tK4VlLsNxak", "ugHJF0hfYkg"], "start_seconds": ["120", "10"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "loud, intense, propeller"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["a child babbles as a woman speaks", "a man speaks followed by another man speaking outside"], "sample_ids": ["wEBlkGWVWwE", "viuTg1M-dqg"], "start_seconds": ["260", "30"], "properties": ["a, babble, woman", "two men, speak, follow"], "captions_pred_video": ["shows a person writing on the whiteboard", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "a clock ticktocks"], "sample_ids": ["slZLHwNbbt4", "v-g-j2uTByM"], "start_seconds": ["300", "30"], "properties": ["a, horn, run", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "a vehicle engine accelerates and wind blows"], "sample_ids": ["wvKpEYswXO0", "wudZTNBtVqc"], "start_seconds": ["150", "60"], "properties": ["plastic, tap, speak", "accelerates, engine, wind"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage is of a parking lot with cars parked in it"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a man speaks as a motor runs in the background"], "sample_ids": ["ugHJF0hfYkg", "xZepNM9qcRA"], "start_seconds": ["10", "30"], "properties": ["engine, running, continuously", "background, motor, run"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["uYT5gxnyMWM", "tiDFTC-5vU"], "start_seconds": ["50", "30"], "properties": ["person, spray, yell", "male, duck, laugh"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking?", "label": 1}, {"captions": ["children speak as a female ask them questions", "some men converse over an engine running"], "sample_ids": ["wEBlkGWVWwE", "sCiy7QS1U"], "start_seconds": ["260", "300"], "properties": ["female, speak, questions", "men, converse, engine"], "captions_pred_video": ["shows a person writing on the whiteboard", null], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a conversation between two people?", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "an infant crying as a woman laughs"], "sample_ids": ["vJ7JPEFhyLA", "xhmRY9yhC7c"], "start_seconds": ["16", "20"], "properties": ["three men, wind, flow", "a, laugh, infant"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as horns blow", "someone is typing on a computer keyboard"], "sample_ids": ["tHyNqRyK34A", "v0x1odnXtP0"], "start_seconds": ["24", "210"], "properties": ["a, man, speaks", "keyboard, type, computer"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a person is typing on a keyboard"], "question": "which is not a person", "label": 1}, {"captions": ["a horn honks and then loudly blares", "water splashes as an animal walks through"], "sample_ids": ["wnpJndXuxLc", "w1ir-sZ3Im8"], "start_seconds": ["50", "90"], "properties": ["horn, honk, loud", "animal, water, splashes"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "water splashes and gurgles as people speak"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a goat bleats as a person speaks", "a duck quacks continuously"], "sample_ids": ["tPJvjq9QePY", "vh30P49Po6s"], "start_seconds": ["40", "30"], "properties": ["bleats, person, speak", "quacks, continuously, duck"], "captions_pred_video": ["a dog and a sheep in a barn", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a baby cries and a man speaks", "a duck is quacking loudly"], "question": "which animal is speaking", "label": 0}, {"captions": ["a male speaks and another male speaks", "material crumbles into a microphone"], "sample_ids": ["viuTg1M-dqg", "vofpvUo6NAw"], "start_seconds": ["30", "220"], "properties": ["two males, speaking, male", "material, crumbles, microphone"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "person wrapping a toy car in a plastic bag"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "paper is being crumpled and crinkled"], "question": "which entity is not a person", "label": 1}, {"captions": ["a machine beeps continuously", "a duck quacks loudly and continuously"], "sample_ids": ["y682ml90jGw", "vh30P49Po6s"], "start_seconds": ["11", "30"], "properties": ["beeps, machine, continuously", "loud, continuous, quacks"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a beeping sound is being made ", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 0}, {"captions": ["a man speaks as a car is passing by", "a vehicle accelerates and squeals tires"], "sample_ids": ["sK4u5T8hW78", "yRx9txMcBl0"], "start_seconds": ["30", "40"], "properties": ["a, car, pass", "accelerates, tires, squeals"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a car is revving its engine and skidding "], "question": "which vehicle is accelerating?", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "an insect buzzes around continuously"], "sample_ids": ["soTOh3zYJfY", "v25l1jef3JY"], "start_seconds": ["40", "0"], "properties": ["vehicle, skid, tires", "buzzes, continuously, insect"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a fly is buzzing around a microphone "], "question": "which entity is not a vehicle?", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a car accelerates and wind blows"], "sample_ids": ["ugHJF0hfYkg", "u0TrcHhkPQ"], "start_seconds": ["10", "20"], "properties": ["engine, running, continuously", "accelerates, wind, blows"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "several insects fly while two men talk"], "sample_ids": ["sapQIQUhFc", "s-T9OVOiMLo"], "start_seconds": ["280", "330"], "properties": ["water, trickles, flow", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more moving parts", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "a clock ticktocks"], "sample_ids": ["xyL9F5VrjkE", "v-g-j2uTByM"], "start_seconds": ["20", "30"], "properties": ["engine, run, wind", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "people applaud and hoot and chat quietly"], "sample_ids": ["vbr9mHKc8WM", "wwyfGO2J4"], "start_seconds": ["40", "90"], "properties": ["noise, loudness, engine", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["an engine is idling", "people are clapping and speaking with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "animals bleat and moo as a person speaks"], "sample_ids": ["vfYTJq7nU", "tPJvjq9QePY"], "start_seconds": ["130", "40"], "properties": ["ducks, quack, man", "animal, bleat, moo"], "captions_pred_video": [null, "a dog and a sheep in a barn"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a baby cries and a man speaks"], "question": "which animal is speaking", "label": 1}, {"captions": ["water runs into a sink while men speak", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vzceMbklWc", "wz7N8YRy74I"], "start_seconds": ["180", "30"], "properties": ["water, sink, run", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["water is running and a man is speaking", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has more men", "label": 1}, {"captions": ["someone snores nearby", "pigeons vocalize and birds chirp"], "sample_ids": ["spJCm8tD9Zo", "uiS58TNyUiw"], "start_seconds": ["90", "430"], "properties": ["someone snores, nearby, someone", "vocalize, bird, chirp"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of the pigeon in the cage"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "someone is typing on a computer keyboard"], "sample_ids": ["s3cTDAj31g", "v0x1odnXtP0"], "start_seconds": ["80", "210"], "properties": ["man, talk, woman", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking and a baby is crying", "a person is typing on a keyboard"], "question": "which is not a person", "label": 1}, {"captions": ["a person whistles and clicks a mouse", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["zCrAfDfv6-A", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["person, mouse, click", "gun, shoot, water"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a person whistles a song", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to cause harm", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "a machine beeps continuously"], "sample_ids": ["ukxt9I7eMMg", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["food, pan, cook", "beeps, machine, continuously"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a beeping sound is being made "], "question": "which entity is not a machine?", "label": 0}, {"captions": ["a woman talks while something is fried and objects are tapped", "people speak as gunfire rings out"], "sample_ids": ["yajyRTUQk3U", "wqTCwqVRDlk"], "start_seconds": ["400", "80"], "properties": ["a woman, something, fried", "gunfire, ring, speak"], "captions_pred_video": ["- a woman cooking in the kitchen", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["xjhAnI2q6hM", "su6FAOcOA8c"], "start_seconds": ["6", "4"], "properties": ["engine revs, vehicle, people", "engine, idle, woman"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a woman is speaking and a subway train is moving "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["some liquid flows while a woman laughs and man talks", "a speedboat passes quickly on the water"], "sample_ids": ["vddP56-ogds", "tjmoSi330GM"], "start_seconds": ["30", "23"], "properties": ["liquid, laughs, man", "speed, water, boat"], "captions_pred_video": [null, "a person riding a jet ski on a lake with trees in the background"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a motorboat speeds through water with wind noise "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "a woman speaks as she rubs two objects together"], "sample_ids": ["zl9Dqx-j7q4", "vzxHnu-SFEw"], "start_seconds": ["6", "80"], "properties": ["engine, laugh, loud", "two objects, woman, speak"], "captions_pred_video": ["footage of a man driving a car in the dark", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a jet engine roars ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "wind blows as people chatter quietly"], "sample_ids": ["voJh2gJxXhA", "xBxDz0CFVn0"], "start_seconds": ["50", "30"], "properties": ["music, frog, croak", "wind, chatter, people"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", "footage is blurry and out of focus"], "captions_pred_audio": ["music is playing and crickets are chirping ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["male speech with light ticking", "a duck quacks continuously"], "sample_ids": ["xO-Q2BlIIPU", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["male, speech, ticking", "quacks, continuously, duck"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["food is frying then a woman speaks", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["ukxt9I7eMMg", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["food, woman, speak", "male, duck, laugh"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking and ducks are quacking"], "question": "which entity is about a duck?", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "people applaud and hoot and chat quietly"], "sample_ids": ["sofxkNWaP0s", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["wind, engine, louder", "people, applaud, hoot"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", null], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a speedboat passes quickly on the water", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["tjmoSi330GM", "tiDFTC-5vU"], "start_seconds": ["23", "30"], "properties": ["speed, water, boat", "male, duck, laugh"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", null], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "a person snores loudly multiple times at a close distance"], "sample_ids": ["y8dSeubCNI", "sSMl2vc3ek"], "start_seconds": ["4", "20"], "properties": ["engine revving, people speaking, motorcycle", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["an engine revving and people talking in the background", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["t97k0cejSQE", "uEU-Hg5MTN8"], "start_seconds": ["250", "27"], "properties": ["bird, chirp, insect", "a woman, laughs, animal"], "captions_pred_video": ["a bee on a purple thistle flower", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be in a forest", "label": 0}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "a infant makes noise and is excited"], "sample_ids": ["w2JXXIAdUdg", "wIJK3-5y0kA"], "start_seconds": ["10", "30"], "properties": ["snoring, distance, person", "noise, excited, infant"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["an airplane engine spools and people speak", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["wTjoRj1se3U", "zFjIWfSD-4"], "start_seconds": ["390", "410"], "properties": ["airplane, engine, spool", "People, motor, brakes"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", null], "captions_pred_audio": ["a jet engine is running and people are talking", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a motor?", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "children cheer as a man speaks then an audience screams"], "sample_ids": ["xzKKf9bKNUo", "vJvryTwuAV8"], "start_seconds": ["10", "16"], "properties": ["background, noise, snoring", "audience, cheer, man"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking and a crowd is shouting and whooping "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "some tunes played by whistling"], "sample_ids": ["yYEVLuqEytU", "u6BnG6YZqJ4"], "start_seconds": ["40", "0"], "properties": ["animal, pig, background", "tune, play, whistling"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["continuous snoring", "a woman and man speak while food is frying"], "sample_ids": ["sLkeqCDJIyw", "zk-xJGQU8-4"], "start_seconds": ["120", "130"], "properties": ["loud, snoring, noise", "food, man, woman"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "a man and a woman cooking in a wok on the stove"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking while dishes are clanging and music is playing in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "a motor runs steadily as a man speaks, then the motor revs twice"], "sample_ids": ["s3cTDAj31g", "ylpYOorfH4o"], "start_seconds": ["80", "410"], "properties": ["man, talk, woman", "motor, run, steady"], "captions_pred_video": [null, "for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15"], "captions_pred_audio": ["a man is speaking and a baby is crying", "a man is speaking and an engine is revving"], "question": "which entity is silent", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "a man speaks as a car is passing by"], "sample_ids": ["yajyRTUQk3U", "sK4u5T8hW78"], "start_seconds": ["400", "30"], "properties": ["a woman, something, fried", "a, car, pass"], "captions_pred_video": ["- a woman cooking in the kitchen", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a frog vocalizes while birds chirp", "pigeons vocalize and birds chirp"], "sample_ids": ["vMf1dLD6Sng", "uiS58TNyUiw"], "start_seconds": ["6", "430"], "properties": ["frog, bird, vocalize", "vocalize, bird, chirp"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", "of the pigeon in the cage"], "captions_pred_audio": ["a frog croaks loudly", "a man is speaking and a bee is buzzing"], "question": "which entity is a frog", "label": 0}, {"captions": ["a female speaks softly as paper crinkles", "a woman speaks as she rubs two objects together"], "sample_ids": ["xvDdE3zNf8Y", "vzxHnu-SFEw"], "start_seconds": ["120", "80"], "properties": ["a, female, speaks", "two objects, woman, speak"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a woman speaks and crumples paper", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is about a woman speaking?", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "a train horn blows as it passes by"], "sample_ids": ["vs65y4qmyBE", "zVacuqSb4LI"], "start_seconds": ["340", "30"], "properties": ["engine, run, man", "horn, blows, train"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "a child speaks in closed space"], "sample_ids": ["xfaoyyzw2WU", "yW6FWLSLkx4"], "start_seconds": ["180", "40"], "properties": ["loud, jet engine, roar", "child, space, speak"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a woman is speaking with background noise and breathing sounds "], "question": "which is louder", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["siJFXfGWgDk", "xBxDz0CFVn0"], "start_seconds": ["50", "30"], "properties": ["man, woman, vehicle", "stream, water, flow"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "a man speaks as a motor runs in the background"], "sample_ids": ["uEU-Hg5MTN8", "xZepNM9qcRA"], "start_seconds": ["27", "30"], "properties": ["animal, grunts, snorts", "background, motor, run"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a car speeding up in the distance", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["u0TrcHhkPQ", "xKB8O8LTs6s"], "start_seconds": ["20", "70"], "properties": ["distance, car, speed", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["water rushes by", "music plays and a woman speaks on a radio before gunshots are fired"], "sample_ids": ["x-PeY8Yb8M4", "xKB8O8LTs6s"], "start_seconds": ["300", "70"], "properties": ["water, rushes, by", "music, radio, gunshots"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a car is driving on a wet road ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is a video of a woman speaking on a radio?", "label": 1}, {"captions": ["birds coo incessantly", "an infant crying as a woman laughs"], "sample_ids": ["yZrFNS7GFBQ", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["coo, bird, incessant", "a, laugh, infant"], "captions_pred_video": ["of the bird in the cage", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["an owl hoots in the background ", "a baby cries and a woman speaks"], "question": "which entity is a human", "label": 1}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "pigeons vocalize and birds chirp"], "sample_ids": ["w9lpbUn0hPc", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["male, wind, rustling", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a train horn blares as a train passes, then fades", "birds chirp and objects are moved around"], "sample_ids": ["zVacuqSb4LI", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["blares, fades, train", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["animals bleat and cry out and then a woman speaks", "a car speeding up in the distance"], "sample_ids": ["yZp6xizR0yU", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["animal, bleat, cry", "distance, car, speed"], "captions_pred_video": ["footage of a woman feeding goats in a barn", null], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a rumble grows louder", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["y4MY9mp8-TA", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["loudness, increase, rumble", "motor noise, horn, siren"], "captions_pred_video": ["a helicopter flying in the sky", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a helicopter flies overhead ", "a truck is honking its horn and a siren is blaring "], "question": "which is louder", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["su6FAOcOA8c", "zl9Dqx-j7q4"], "start_seconds": ["4", "6"], "properties": ["engine, idle, woman", "engine, laugh, loud"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a jet engine roars "], "question": "which entity is a man?", "label": 1}, {"captions": ["a person is whistling a tune", "paper is crumpling consistently"], "sample_ids": ["scYRUkrFLiQ", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["a, tune, whistle", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a person whistling a song", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["electronic beeps occur in a short series", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["y682ml90jGw", "yajyRTUQk3U"], "start_seconds": ["11", "400"], "properties": ["beeps, series, electronic", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a beeping sound is being made ", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["zofjfKhqLk8", "w5W5Kqtc8E"], "start_seconds": ["10", "100"], "properties": ["background, metal, clings", "wind, blow, vehicle"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running?", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "a stream of water flows as people talk and wind blows"], "sample_ids": ["u21-Z5gJCB8", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["background, voice, man", "stream, water, flow"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "plastic is tapped on while someone speaks"], "sample_ids": ["zl9Dqx-j7q4", "wvKpEYswXO0"], "start_seconds": ["6", "150"], "properties": ["motors rev, laugh, loudly", "plastic, tap, speak"], "captions_pred_video": ["footage of a man driving a car in the dark", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a jet engine roars ", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sOa7g-44Dag", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["background, man, spray", "loud, laughter, intermittent"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a person is snoring while sleeping", "a child speaks in closed space"], "sample_ids": ["vJrjSeP17yE", "yW6FWLSLkx4"], "start_seconds": ["40", "40"], "properties": ["a person is sleeping, snoring, person", "child, space, speak"], "captions_pred_video": ["a black background with a small plane flying in the sky", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "vehicles pass by on a roadway"], "sample_ids": ["vbZ-0lGPneg", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["a woman, a television program, a bird", "pass, vehicle, roadway"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a car is driving on the road "], "question": "which entity has more vehicles", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "a child speaks in closed space"], "sample_ids": ["sWZzXuWYY", "yW6FWLSLkx4"], "start_seconds": ["420", "40"], "properties": ["male, clanks, thumps", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["xM4joTqDVp4", "yajyRTUQk3U"], "start_seconds": ["160", "400"], "properties": ["background, chirp, birds", "a woman, something, fried"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "- a woman cooking in the kitchen"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a man speaks followed by another man speaking outside", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["viuTg1M-dqg", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["two men, speak, follow", "loud, jet engine, roar"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "people cheer as a vehicle engine revs"], "sample_ids": ["w6RTHR6AeAg", "xjhAnI2q6hM"], "start_seconds": ["40", "6"], "properties": ["call, owl, screech", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "a car accelerates and wind blows"], "sample_ids": ["tGcFnX0GHI", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["ring, talk, woman", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man woman speak while crickets sing", "a stream of water flows as people talk and wind blows"], "sample_ids": ["zTLVJCo4WEE", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["a, crickets, sing", "stream, water, flow"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man is speaking with wind noise in the background "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "waves crash against a shoreline and people speak"], "sample_ids": ["w8uLijTqtlU", "yFB25fqfU8I"], "start_seconds": ["70", "300"], "properties": ["wind, microphone, noise", "wave, crash, shoreline"], "captions_pred_video": ["footage is blurry and shaky", "footage of a person surfing in the ocean"], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "some men converse over an engine running"], "sample_ids": ["tQWGZLItBXk", "sCiy7QS1U"], "start_seconds": ["170", "300"], "properties": ["music, person, ding", "men, converse, engine"], "captions_pred_video": ["worms revolution screenshots", null], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a person speaking?", "label": 0}, {"captions": ["some tunes played by whistling", "wind blows as people chatter quietly"], "sample_ids": ["u6BnG6YZqJ4", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["tune, play, whistling", "wind, chatter, people"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", "footage is blurry and out of focus"], "captions_pred_audio": ["a person whistling a song", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vb1fPSDI4c", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["multiple, people, yell", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a crowd of people are talking and laughing", "a duck quacks and a woman speaks"], "question": "which entity has more people speaking?", "label": 0}, {"captions": ["small dogs growl, bark and yip.", "popping and crackling repeats as men yell and laugh"], "sample_ids": ["sShpyu2l4YQ", "rqu8iB22IY"], "start_seconds": ["0", "5"], "properties": ["growl, bark, yip", "sound, repeats, laugh"], "captions_pred_video": ["the puppies are playing with a toy", null], "captions_pred_audio": ["a dog is barking and growling", "a dog barks and a man speaks while music plays "], "question": "which entity is more likely to be repeated", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "water pouring and bubbling"], "sample_ids": ["vZAw4apG0Es", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["background, tick, repeat", "water, bubbles, pouring"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a clock is ticking and people are talking", "water is running from a faucet"], "question": "which entity is more likely to be a video of a man speaking?", "label": 0}, {"captions": ["birds chirp and pigeons vocalize while walking around", "several ducks quack and cocks crow far away"], "sample_ids": ["wIvYjuR3nrg", "sNB8zxXneIM"], "start_seconds": ["9", "20"], "properties": ["birds, pigeons, vocalize", "several, quack, cocks"], "captions_pred_video": ["footage of a pigeon sitting on a roof with trees in the background", "a group of geese in a cage"], "captions_pred_audio": ["birds are chirping and cooing", "a rooster is crowing and wind is blowing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "a clock ticktocks"], "sample_ids": ["sLUnaPT5gM8", "v-g-j2uTByM"], "start_seconds": ["0", "30"], "properties": ["loud, laughter, intermittent", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["a male speaks and another male speaks", "a fly buzzes around loudly as birds chirp"], "sample_ids": ["viuTg1M-dqg", "uJV8NDaHqqk"], "start_seconds": ["30", "100"], "properties": ["two males, speaking, male", "loud, fly, chirp"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "a bee hive in a wooden box"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a swarm of bees buzzing around"], "question": "which entity is louder", "label": 1}, {"captions": ["wind blowing followed by a zoom", "a man speaks as a motor runs in the background"], "sample_ids": ["vr8ZXjEBhMQ", "xZepNM9qcRA"], "start_seconds": ["150", "30"], "properties": ["wind, blow, zoom", "background, motor, run"], "captions_pred_video": ["is taken from a motorcycle's point of view", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a man speaking in the background?", "label": 1}, {"captions": ["two frogs croak at each other", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["zg0X6BnhOLQ", "zl9Dqx-j7q4"], "start_seconds": ["410", "6"], "properties": ["two frogs, croak, at each other", "engine, laugh, loud"], "captions_pred_video": ["footage of lightning in the sky at night", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a frog is croaking", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["wind loudly blowing while people speak in the background followed by a horn blowing", "a woman speaks happily and an animal chirps"], "sample_ids": ["xjhAnI2q6hM", "uWAAAL4CIoc"], "start_seconds": ["6", "0"], "properties": ["wind, blow, loudly", "a woman, chirps, animal"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", null], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["children speak and play together", "a power tool runs and touches a surface"], "sample_ids": ["yVVP8XvWJTo", "zfvPRf3chY"], "start_seconds": ["260", "290"], "properties": ["children, speak, play", "power tool, run, touch"], "captions_pred_video": ["footage of a playground at a school or daycare center", null], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a man is speaking while a power tool is being used "], "question": "which is not a person", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vZAw4apG0Es", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["background, tick, repeat", "a, scream, girl"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a clock is ticking and people are talking", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["leaves rustle while man speaks", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["zOZleIRqZm4", "uEU-Hg5MTN8"], "start_seconds": ["80", "27"], "properties": ["leaves, rustle, speak", "a woman, laughs, animal"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a woman is speaking and a baby is crying"], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["a muffled toilet flushes and the water drains", "an airplane engine spools and people speak"], "sample_ids": ["sfAvvZwdLCY", "wTjoRj1se3U"], "start_seconds": ["20", "390"], "properties": ["flushes, drains, water", "airplane, engine, spool"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a toilet is flushed", "a jet engine is running and people are talking"], "question": "which entity is a machine", "label": 1}, {"captions": ["several insects fly while two men talk", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["s-T9OVOiMLo", "zFjIWfSD-4"], "start_seconds": ["330", "410"], "properties": ["several, fly, men", "People, motor, brakes"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", null], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "the rumbling of a bus followed by a soft male voice"], "sample_ids": ["u7C-AEBQM", "vK93VuO0yNc"], "start_seconds": ["30", "30"], "properties": ["ticks, rhythmic, quiet", "male voice, bus, rumble"], "captions_pred_video": [null, "footage is blurry due to the movement of the bus as it drives through the city at night"], "captions_pred_audio": ["a ticktock of a clock", "a car drives by with wind noise in the background "], "question": "which entity is quieter", "label": 0}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a toilet flushes and water drains"], "sample_ids": ["weDbePuc-Xc", "sfAvvZwdLCY"], "start_seconds": ["40", "20"], "properties": ["music, slaps, human", "water drains, flushes, water"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "a female speaks softly as paper crinkles"], "sample_ids": ["zY3icUyMdh8", "xvDdE3zNf8Y"], "start_seconds": ["20", "120"], "properties": ["dog, bark, engine", "a, female, speaks"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "of a woman in a white shirt and glasses holding a purple tie"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a woman speaks and crumples paper"], "question": "which entity is speaking softly", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "people applaud and hoot and chat quietly"], "sample_ids": ["w-4gHptFNuU", "wwyfGO2J4"], "start_seconds": ["21", "90"], "properties": ["engine revs, accelerates, bump", "people, applaud, hoot"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be a video of a performance", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["yVumC9TGknc", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["humming, clock, birds", "airplane, boy, fly"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a series of beeps and chirps", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "a man speaks as a car is passing by"], "sample_ids": ["sU53zg9Jp7s", "sK4u5T8hW78"], "start_seconds": ["380", "30"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "a, car, pass"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["an airplane engine roars increasingly louder", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vBslzh7saPw", "vb1fPSDI4c"], "start_seconds": ["90", "30"], "properties": ["engine, roar, louder", "multiple, people, yell"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "a crowd of people are talking and laughing"], "question": "which entity is quieter", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "two women and a man talk while a kid cries"], "sample_ids": ["ukg5L09Wpvo", "wyllXV6PjKo"], "start_seconds": ["150", "30"], "properties": ["a train, a horn, a bell", "a kid, talk, cry"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", null], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a woman speaks and a baby cries"], "question": "which entity has a kid?", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "a horn rings out as a machine runs by"], "sample_ids": ["shmR4OZtzqA", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["man, engine, idle", "a, horn, run"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man speaks while a motor runs", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a man speaks as a car is passing by", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sK4u5T8hW78", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["a, car, pass", "men, talk, cars"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["continuous snoring", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sLkeqCDJIyw", "uYT5gxnyMWM"], "start_seconds": ["120", "50"], "properties": ["loud, snoring, noise", "female, spraying, scream"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is louder", "label": 1}, {"captions": ["a toilet flushes and water drains", "a clock ticktocks in wind"], "sample_ids": ["sfAvvZwdLCY", "yVumC9TGknc"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "ticktocks, clock, wind"], "captions_pred_video": ["footage of the toilet in the bathroom", "game title screen of the game shadow of the colossus on sony playstation 2"], "captions_pred_audio": ["a toilet is flushed", "a series of beeps and chirps"], "question": "which entity is not a clock?", "label": 0}, {"captions": ["a muffled toilet flushes and the water drains", "multiple people speak and children yell while water gurgles"], "sample_ids": ["sfAvvZwdLCY", "vb1fPSDI4c"], "start_seconds": ["20", "30"], "properties": ["flushes, drains, water", "multiple, people, yell"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["wind blowing and birds chirping with the distant cooing of a large bird", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["wRBHTgrbiwg", "vfYTJq7nU"], "start_seconds": ["50", "130"], "properties": ["birds, chirp, cooing", "rustling, ducks, quack"], "captions_pred_video": ["of a bee pollinating the flowers in the field", null], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a duck quacks and a woman speaks"], "question": "which entity is about birds?", "label": 0}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["ziUT9IFTkjg", "wqZ135Ssz0"], "start_seconds": ["10", "60"], "properties": ["background, birds, rustling", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more birds", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "water rushes by"], "sample_ids": ["xO-Q2BlIIPU", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["two men, exclamation, speak", "water, rushes, by"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a car is driving on a wet road "], "question": "which entity is more calm", "label": 0}, {"captions": ["a man speaks while video game music plays with some clicking", "a telephone rings followed by a woman talking"], "sample_ids": ["tw76HGONaKg", "tGcFnX0GHI"], "start_seconds": ["570", "0"], "properties": ["music, click, man", "ring, talk, woman"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", null], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "someone whistles a tune"], "sample_ids": ["yI-KvObbDoY", "sIXTftIuUgw"], "start_seconds": ["260", "90"], "properties": ["sound, smack, wind", "someone, tune, whistle"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", null], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "a clock ticktocks"], "sample_ids": ["zALy31PjDl0", "v-g-j2uTByM"], "start_seconds": ["21", "30"], "properties": ["a man, a vehicle, a horn", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["white noise and birds chirping", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["wRBHTgrbiwg", "yajyRTUQk3U"], "start_seconds": ["50", "400"], "properties": ["noise, white, chirping", "a woman, something, fried"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "- a woman cooking in the kitchen"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a woman is speaking while food is frying in the background"], "question": "which entity is a video", "label": 1}, {"captions": ["an insect buzzes around continuously", "wind loudly blowing while people speak in the background followed by a horn blowing"], "sample_ids": ["v25l1jef3JY", "xjhAnI2q6hM"], "start_seconds": ["0", "6"], "properties": ["buzzes, continuously, insect", "wind, blow, loudly"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a truck is revving its engine and a man is speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["food is frying and sizzles", "three men talk while wind blows and some liquid flows"], "sample_ids": ["zNRChLjqcU", "vJ7JPEFhyLA"], "start_seconds": ["220", "16"], "properties": ["food is frying, sizzles, food", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["water is running from a faucet into a sink", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a person?", "label": 0}, {"captions": ["dogs bark as an engine runs and a person whistles", "an airplane engine spools and people speak"], "sample_ids": ["zY3icUyMdh8", "wTjoRj1se3U"], "start_seconds": ["20", "390"], "properties": ["dog, bark, engine", "airplane, engine, spool"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a jet engine is running and people are talking"], "question": "which entity is a machine", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "wind blows as people chatter quietly"], "sample_ids": ["tDVADusiIoc", "xBxDz0CFVn0"], "start_seconds": ["60", "30"], "properties": ["man, radio, blows", "wind, chatter, people"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["sapQIQUhFc", "w34HjHr6gAY"], "start_seconds": ["280", "30"], "properties": ["liquid, flow, distance", "beeps, hit, woman"], "captions_pred_video": [null, "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "a infant makes noise and is excited"], "sample_ids": ["smDKStoHBJo", "wIJK3-5y0kA"], "start_seconds": ["0", "30"], "properties": ["a, talk, baby, cry", "noise, excited, infant"], "captions_pred_video": ["a man holding a crying baby in his arms", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a baby cries and a woman speaks"], "question": "which is a more active scene", "label": 1}, {"captions": ["a man speaking with light rustling", "dishes cling together then a man begins to speak"], "sample_ids": ["zOZleIRqZm4", "sQGXqGcwOTc"], "start_seconds": ["80", "3"], "properties": ["light, rustling, man", "cling, speak, dishes"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["birds chirp and a pop occurs before a man speaks", "water flows as men speak and yell"], "sample_ids": ["zuua6-5goWw", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["sound, pop, bird", "water, flow, men"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["water quietly rushes by while birds chirp in the background", "winds blows roughly as a vehicle races past"], "sample_ids": ["sYITalLZjj4", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["water, rushes, background, birds", "wind, blows, vehicle"], "captions_pred_video": ["two ducks are swimming in the water near each other", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["wind blows and birds chirp", "a jet engine roars and wind blows "], "question": "which entity is more calm", "label": 0}, {"captions": ["ticking continues without interruption", "a horn honks followed by a loud continuous buzzing while men speak"], "sample_ids": ["v-g-j2uTByM", "wsHBIgzs9Fs"], "start_seconds": ["30", "50"], "properties": ["ticking, continuous, clock", "horn, continuous, buzzing"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "shows a motorcycle riding down a country road with a motorcycle in the foreground"], "captions_pred_audio": ["a clock is ticking loudly", "a car accelerates and revs its engine while a man speaks "], "question": "which entity is continuous", "label": 1}, {"captions": ["several insects fly while two men talk", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["s-T9OVOiMLo", "uYT5gxnyMWM"], "start_seconds": ["330", "50"], "properties": ["several, fly, men", "a, scream, girl"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has more people", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xjhAnI2q6hM", "tDVADusiIoc"], "start_seconds": ["6", "60"], "properties": ["engine revs, vehicle, people", "water, radio, man"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a vehicle?", "label": 0}, {"captions": ["an animal growls followed by birds chirping", "people speak as gunfire rings out"], "sample_ids": ["y2ZBGpgbhHM", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["animal, growl, bird", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["birds chirping and a dog panting", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "vehicles pass by on a roadway"], "sample_ids": ["vSeGhaZt-aI", "tgbONvsP47Y"], "start_seconds": ["50", "0"], "properties": ["water, bubbles, speak", "pass, vehicle, roadway"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["an aircraft engine runs", "water is sprayed across a hard surface"], "sample_ids": ["yLCORCnd35Q", "sQwlkXjQabo"], "start_seconds": ["0", "10"], "properties": ["engine, aircraft, runs", "water, spray, surface"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "a telephone rings followed by a woman talking"], "sample_ids": ["uPDn2BFTHk", "tGcFnX0GHI"], "start_seconds": ["140", "0"], "properties": ["lady, laugh, baby", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation between a woman and a baby?", "label": 0}, {"captions": ["a clock ticktocks", "water is sprayed across a hard surface"], "sample_ids": ["v-g-j2uTByM", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["ticktocks, clock, ticktocks", "water, spray, surface"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a clock is ticking loudly", "spraying followed by silence"], "question": "which entity is not a clock?", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "people cheer as a vehicle engine revs"], "sample_ids": ["w34HjHr6gAY", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["beeps, squawk, child speaking", "engine revs, vehicle, people"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["humming of idling and revving engine with a man speaking", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wqADXCzngMw", "xKB8O8LTs6s"], "start_seconds": ["340", "70"], "properties": ["audio, humming, revving", "music, gunfire, explosion"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "a toilet flushes and a female speaks"], "sample_ids": ["xjhAnI2q6hM", "yaln9y8I7ms"], "start_seconds": ["6", "230"], "properties": ["engine revs, vehicle, people", "female, flushes, toilet"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "footage is blurry and out of focus"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wjsXBsc7M40", "sSMl2vc3ek"], "start_seconds": ["10", "20"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "loud, multiple, distance"], "captions_pred_video": ["footage of the baby playing with a toothbrush", null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["animals bleat and cry out and then a woman speaks", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["yZp6xizR0yU", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["animal, bleat, cry", "applause, audience, yells"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "wind blows as people chatter quietly"], "sample_ids": ["xV7Mg1QucSc", "xBxDz0CFVn0"], "start_seconds": ["14", "30"], "properties": ["alarm, ticktocks, laughs", "wind, chatter, people"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "footage is blurry and out of focus"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["someone whistles a song", "a vehicles accelerate quickly and someone laughs"], "sample_ids": ["sIXTftIuUgw", "uWPRNLnpy7Y"], "start_seconds": ["90", "10"], "properties": ["someone, song, whistle", "accelerate, laugh, vehicle"], "captions_pred_video": [null, "is taken from a car driving down the street"], "captions_pred_audio": ["a person whistling a song", "a car accelerates and revs its engine "], "question": "which entity is more likely to be a song", "label": 0}, {"captions": ["a machine beeps continuously", "a woman speaks and then a man speaks"], "sample_ids": ["y682ml90jGw", "vbpKkWvfOu4"], "start_seconds": ["11", "560"], "properties": ["beeps, machine, continuously", "a, man, speaks"], "captions_pred_video": [null, "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a beeping sound is being made ", "a woman is speaking and a man is speaking"], "question": "which entity is a human speaking?", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vXlk0lIQBFo", "tDVADusiIoc"], "start_seconds": ["470", "60"], "properties": ["wind, talk, vocalize", "water, radio, man"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio?", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "multiple insects buzz over rustling wind"], "sample_ids": ["vzxHnu-SFEw", "tMJne1a4AFI"], "start_seconds": ["80", "0"], "properties": ["two objects, woman, speak", "wind, buzz, rustling"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "a swarm of bees on the ground"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a swarm of bees buzzing around"], "question": "which entity is moving", "label": 1}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wjsXBsc7M40", "xKB8O8LTs6s"], "start_seconds": ["10", "70"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "music, gunfire, explosion"], "captions_pred_video": ["footage of the baby playing with a toothbrush", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a baby laughs and a woman speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "a person snores loudly multiple times at a close distance"], "sample_ids": ["t69a8aRKhmc", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["a, b, c", "loud, multiple, distance"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["vveS8HT7Uog", "uEU-Hg5MTN8"], "start_seconds": ["100", "27"], "properties": ["a man, objects, speak", "animal, grunts, snorts"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["a propeller moves loudly nearby", "a frog croaks as other frogs croak in the background"], "sample_ids": ["ugHJF0hfYkg", "yswmmRZFItk"], "start_seconds": ["10", "0"], "properties": ["loud, propeller, move", "background, frog, croak"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a close up of a frog in the water"], "captions_pred_audio": ["a helicopter is flying overhead ", "a frog is croaking"], "question": "which is quieter", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "a woman speaks happily and an animal chirps"], "sample_ids": ["zkKdxzNC97Y", "uWAAAL4CIoc"], "start_seconds": ["27", "0"], "properties": ["loud, bang, noise", "a woman, chirps, animal"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motor idles, accelerates, then slows down.", "material crumbles into a microphone"], "sample_ids": ["vYkA3cfXp5Q", "vofpvUo6NAw"], "start_seconds": ["30", "220"], "properties": ["speed, idle, accelerate", "material, crumbles, microphone"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "person wrapping a toy car in a plastic bag"], "captions_pred_audio": ["an engine is idling", "paper is being crumpled and crinkled"], "question": "which entity is more likely to be a video of a car engine?", "label": 0}, {"captions": ["a man speaks as he moves silverware in a bowl", "an infant crying frantically"], "sample_ids": ["x6ijhqRY38s", "zwOBqeFTgiU"], "start_seconds": ["250", "30"], "properties": ["bowl, silverware, man", "cry, infant, frantically"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["v0x1odnXtP0", "yajyRTUQk3U"], "start_seconds": ["210", "400"], "properties": ["keyboard, type, computer", "a woman, something, fried"], "captions_pred_video": ["how to make money on youtube in spanish", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a person is typing on a keyboard", "a woman is speaking while food is frying in the background"], "question": "which entity is a demonstration of cooking?", "label": 1}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "children cheer as a man speaks then an audience screams"], "sample_ids": ["wnpJndXuxLc", "vJvryTwuAV8"], "start_seconds": ["50", "16"], "properties": ["beeps, loud, whistle", "audience, cheer, man"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a man is speaking and a crowd is shouting and whooping "], "question": "which entity is a person speaking to an audience?", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["zgUgkpk78xU", "uEU-Hg5MTN8"], "start_seconds": ["70", "27"], "properties": ["horn, bells, ring", "a woman, laughs, animal"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be heard", "label": 0}, {"captions": ["a woman speaks and then a man speaks", "an infant crying as a woman laughs"], "sample_ids": ["vbpKkWvfOu4", "xhmRY9yhC7c"], "start_seconds": ["560", "20"], "properties": ["a, man, speaks", "a, laugh, infant"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a baby cries and a woman speaks"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["material crumbles into a microphone", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vofpvUo6NAw", "zl9Dqx-j7q4"], "start_seconds": ["220", "6"], "properties": ["material, crumbles, microphone", "engine, laugh, loud"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "footage of a man driving a car in the dark"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["rain falls on a surface as men speak and thunder roars", "a vehicle engine accelerating then running on idle"], "sample_ids": ["w0xsN8X18Y", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["rain, thunder, surface", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "an engine is idling"], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "an infant crying frantically"], "sample_ids": ["uRExseg-0XI", "zwOBqeFTgiU"], "start_seconds": ["210", "30"], "properties": ["woman, man, water", "cry, infant, frantically"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "water flows and trickles"], "sample_ids": ["t8CV69hcvF0", "tB7hWb9gTuQ"], "start_seconds": ["210", "30"], "properties": ["person, sneeze, follow", "water, flow, trickle"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman sneezes and speaks", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["someone is snoring while sleeping", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["ujMt0-D-x2k", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["snore, sleep, someone", "engine, idle, woman"], "captions_pred_video": ["of the dog playing with a toy on the floor", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a subway train is moving "], "question": "which entity is a person", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sLUnaPT5gM8", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["loud, laughter, intermittent", "stream, water, flow"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "footage is blurry and out of focus"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "people speak as gunfire rings out"], "sample_ids": ["vZAw4apG0Es", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["background, tick, repeat", "gunfire, ring, speak"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["a child speaks", "a child speaks"], "sample_ids": ["yW6FWLSLkx4", "yW6FWLSLkx4"], "start_seconds": ["40", "40"], "properties": ["a, child, speaks", "a, child, speaks"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a woman is speaking with background noise and breathing sounds "], "question": "which child speaks", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "a toilet flushes and water drains"], "sample_ids": ["ylpYOorfH4o", "sfAvvZwdLCY"], "start_seconds": ["410", "20"], "properties": ["engine, run, loud", "water drains, flushes, water"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a goat bleats as a person speaks", "a car accelerates and wind blows"], "sample_ids": ["tPJvjq9QePY", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["bleats, person, speak", "accelerates, wind, blows"], "captions_pred_video": ["a dog and a sheep in a barn", null], "captions_pred_audio": ["a baby cries and a man speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "a man speaks as a machine runs"], "sample_ids": ["w0xsN8X18Y", "vD6lYD1l0BY"], "start_seconds": ["30", "330"], "properties": ["music, surface, rain", "a, machine, run"], "captions_pred_video": [null, "game controller being held in the hands of the person"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a man is speaking and dishes are being washed "], "question": "which entity has a machine running?", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "small dogs yip and bark sharply"], "sample_ids": ["tiDFTC-5vU", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["male, duck, laugh", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["waves crash against a shoreline and people speak", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["yFB25fqfU8I", "su6FAOcOA8c"], "start_seconds": ["300", "4"], "properties": ["wave, crash, shoreline", "engine, idle, woman"], "captions_pred_video": ["footage of a person surfing in the ocean", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a man speaks as water trickles down a stream", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sapQIQUhFc", "uYT5gxnyMWM"], "start_seconds": ["280", "50"], "properties": ["water, stream, trickles", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["a young woman speaks over spraying and another person yells", "a clock ticktocks"], "sample_ids": ["uYT5gxnyMWM", "v-g-j2uTByM"], "start_seconds": ["50", "30"], "properties": ["person, spray, yell", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "birds chirp and objects are moved around"], "sample_ids": ["v5P-ThUCINM", "yPUYU6t3rwo"], "start_seconds": ["400", "370"], "properties": ["background, chirp, bird", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and birds are chirping", "insects buzz and a man speaks"], "question": "which entity has birds chirping in the background?", "label": 0}, {"captions": ["a goat bleats as a person speaks", "a stream of water runs briefly"], "sample_ids": ["tPJvjq9QePY", "x-PeY8Yb8M4"], "start_seconds": ["40", "300"], "properties": ["bleats, person, speak", "stream, water, run"], "captions_pred_video": ["a dog and a sheep in a barn", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a baby cries and a man speaks", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["sU53zg9Jp7s", "tDVADusiIoc"], "start_seconds": ["380", "60"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "water, radio, man"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a woman?", "label": 0}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["yYEVLuqEytU", "vfYTJq7nU"], "start_seconds": ["40", "130"], "properties": ["grunt, slurp, background", "rustling, ducks, quack"], "captions_pred_video": ["a baby goat is being petted by a person's hand", null], "captions_pred_audio": ["several sheep bleat and a man speaks", "a duck quacks and a woman speaks"], "question": "which entity is about ducks?", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a woman speaks happily and an animal chirps"], "sample_ids": ["y2bVZ7rz-5M", "uWAAAL4CIoc"], "start_seconds": ["280", "0"], "properties": ["motor noise, horn, siren", "a woman, chirps, animal"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", null], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["electronic beeps occur in a short series", "people applaud and hoot and chat quietly"], "sample_ids": ["y682ml90jGw", "wwyfGO2J4"], "start_seconds": ["11", "90"], "properties": ["beeps, series, electronic", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "a person is snoring while sleeping"], "sample_ids": ["w2M4i1mklOA", "vJrjSeP17yE"], "start_seconds": ["30", "40"], "properties": ["alarm, gears, turn", "a person is sleeping, snoring, person"], "captions_pred_video": ["footage of an antique clock", "a black background with a small plane flying in the sky"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a person snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["x9JovgqUcs", "uYT5gxnyMWM"], "start_seconds": ["500", "50"], "properties": ["a, man, speaks, keyboard", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking and typing on a keyboard?", "label": 0}, {"captions": ["a train horn blares as a train passes, then fades", "a infant makes noise and is excited"], "sample_ids": ["zVacuqSb4LI", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["blares, fades, train", "noise, excited, infant"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["people speak softly as food sizzles", "three men talk while wind blows and some liquid flows"], "sample_ids": ["yhQ2Lg-7qDY", "vJ7JPEFhyLA"], "start_seconds": ["130", "16"], "properties": ["food, sizzle, speak", "three men, wind, flow"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a liquid flowing?", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["wqN6IIHw3po", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["rain, surface, fall", "a woman, something, fried"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and water is splashing", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "a toilet flushes and a female speaks"], "sample_ids": ["slZLHwNbbt4", "yaln9y8I7ms"], "start_seconds": ["300", "230"], "properties": ["a, horn, run", "female, flushes, toilet"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "footage is blurry and out of focus"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a toilet flushes and a man speaks"], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a man speaks followed by another man speaking outside", "water splashes as an animal walks through"], "sample_ids": ["viuTg1M-dqg", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["two men, speak, follow", "animal, water, splashes"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["some people speak", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vbZ-0lGPneg", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "loud, multiple, distance"], "captions_pred_video": ["of a man holding a baby duck in his hands", null], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["wnpJndXuxLc", "vbZ-0lGPneg"], "start_seconds": ["50", "30"], "properties": ["beeps, loud, whistle", "a woman, a television program, a bird"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a series of light horn beeps followed by a loud steam whistle?", "label": 0}, {"captions": ["a woman and man are speaking", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vbpKkWvfOu4", "xKB8O8LTs6s"], "start_seconds": ["560", "70"], "properties": ["two people, speaking, woman, man", "music, gunfire, explosion"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more action", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "a railroad crossing bell rings as a train horn blows"], "sample_ids": ["wudZTNBtVqc", "tZGN5a7ybxo"], "start_seconds": ["60", "60"], "properties": ["accelerates, engine, wind", "ring, train, horn"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "is taken from a moving vehicle on the train tracks"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a train is moving and blowing its horn "], "question": "which entity is a warning device", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "a woman speaks and dog vocalizes"], "sample_ids": ["w8uLijTqtlU", "uWAAAL4CIoc"], "start_seconds": ["70", "0"], "properties": ["wind, microphone, noise", "a, dog, vocalize"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["the wind is blowing strongly", "a woman is speaking and a dog is barking "], "question": "which entity is a person", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "a person is whistling a tune"], "sample_ids": ["vfYTJq7nU", "scYRUkrFLiQ"], "start_seconds": ["130", "30"], "properties": ["rustling, ducks, quack", "a, tune, whistle"], "captions_pred_video": [null, "of the man wearing a bow tie and a suit jacket in front of a red door"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "several insects fly while two men talk"], "sample_ids": ["v5P-ThUCINM", "s-T9OVOiMLo"], "start_seconds": ["400", "330"], "properties": ["background, chirp, bird", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more flying insects", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "people speak as gunfire rings out"], "sample_ids": ["sZPuqDgX2V0", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["engine, accelerate, intercom", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vfYTJq7nU", "xKB8O8LTs6s"], "start_seconds": ["130", "70"], "properties": ["ducks, quack, man", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a duck quacks and a woman speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more active", "label": 1}, {"captions": ["a door opens and birds chirp", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["yeFvk9x0wWI", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["door, open, birds", "engine, laugh, loud"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a jet engine roars "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a toilet flushes and water drains", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["sfAvvZwdLCY", "wDVMhEdTiVw"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "gun, shoot, water"], "captions_pred_video": ["footage of the toilet in the bathroom", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a toilet is flushed", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a woman speaks and is crumpling paper", "wind blows as people chatter quietly"], "sample_ids": ["xvDdE3zNf8Y", "xBxDz0CFVn0"], "start_seconds": ["120", "30"], "properties": ["A, crumple, paper", "wind, chatter, people"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["uYT5gxnyMWM", "vbZ-0lGPneg"], "start_seconds": ["50", "30"], "properties": ["a, scream, girl", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["sxYkFKFIZD0", "y2bVZ7rz-5M"], "start_seconds": ["20", "280"], "properties": ["screech, man, door", "motor noise, horn, siren"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is a warning", "label": 1}, {"captions": ["an infant crying as a woman laughs", "a vehicle accelerates and squeals tires"], "sample_ids": ["xhmRY9yhC7c", "yRx9txMcBl0"], "start_seconds": ["20", "40"], "properties": ["a, laugh, infant", "accelerates, tires, squeals"], "captions_pred_video": ["of a baby crying in a baby bouncer", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a baby cries and a woman speaks", "a car is revving its engine and skidding "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["there are rhythmical snoring nearby", "pigeons vocalize and birds chirp"], "sample_ids": ["ujMt0-D-x2k", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["snoring, rhythmical, nearby", "vocalize, bird, chirp"], "captions_pred_video": ["of the dog playing with a toy on the floor", "of the pigeon in the cage"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "an engine runs loudly"], "sample_ids": ["yZrFNS7GFBQ", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["pigeon, buzzes, insect", "loud, engine, run"], "captions_pred_video": ["of the bird in the cage", "footage is blurry because it's raining outside"], "captions_pred_audio": ["an owl hoots in the background ", "a lawn mower is running and men are speaking "], "question": "which entity is quieter", "label": 0}, {"captions": ["a large crowd cheers and applauds", "a motor vehicle roars, drowning out people speaking in the background"], "sample_ids": ["rqfQRErjfk8", "s4Uz1Ffgo04"], "start_seconds": ["170", "100"], "properties": ["crowd, cheers, applauds", "roars, background, people speaking"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["a helicopter engine runs", "a person snores loudly multiple times at a close distance"], "sample_ids": ["t5ZbXbniOWk", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["engine, helicopter, run", "loud, multiple, distance"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["zFjIWfSD-4", "y8WEcpOlT3I"], "start_seconds": ["410", "40"], "properties": ["People, motor, brakes", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a man is speaking with wind noise in the background "], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "a machine beeps continuously"], "sample_ids": ["uPDn2BFTHk", "y682ml90jGw"], "start_seconds": ["140", "11"], "properties": ["woman, laughs, speaks", "beeps, machine, continuously"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "vehicles pass by on a roadway"], "sample_ids": ["t25U-v4k4ts", "tgbONvsP47Y"], "start_seconds": ["40", "0"], "properties": ["a, chirps, bird", "pass, vehicle, roadway"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a bird chirps in response to a woman chirping for the birds"], "sample_ids": ["zY3icUyMdh8", "uOpoD0gGXcs"], "start_seconds": ["20", "120"], "properties": ["dog, bark, engine", "chirps, woman, bird"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "a herd of cows grazing in the field"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "birds are chirping and a man is speaking"], "question": "which entity is a response to a human action", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "water splashes and a door squeaks"], "sample_ids": ["s4Uz1Ffgo04", "sdXV-ylviw"], "start_seconds": ["100", "190"], "properties": ["water, rushes, motorcycle", "sound, splash, door"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a dog barks and taps with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["xERFUeZONz8", "tDlysoZiA1I"], "start_seconds": ["0", "0"], "properties": ["ring, approach, traffic", "animal, grunts, chirps"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["an emergency vehicle siren blares", "birds are chirping and a rooster is crowing "], "question": "which entity is more animal like", "label": 1}, {"captions": ["an insect buzzes around continuously", "a motorcycle idles loudly as wind blows"], "sample_ids": ["v25l1jef3JY", "v7jJS8aAyA"], "start_seconds": ["0", "10"], "properties": ["buzzes, continuously, insect", "wind, blows, loudly"], "captions_pred_video": ["a black background with a cartoon character in the foreground", null], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a motorcycle engine is idling and vibrating"], "question": "which entity is louder", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "a man speaks as a car is passing by"], "sample_ids": ["se87d6yxEOA", "sK4u5T8hW78"], "start_seconds": ["10", "30"], "properties": ["run, whistle, pass", "a, car, pass"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a man is speaking with background noise and breathing sounds "], "question": "which object is moving", "label": 0}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "music plays, a person speaks, followed by whooshes and a ding"], "sample_ids": ["uYT5gxnyMWM", "tQWGZLItBXk"], "start_seconds": ["50", "170"], "properties": ["female, spraying, scream", "music, person, ding"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "worms revolution screenshots"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a child speaks music plays video game sounds sound effects and sound effects play "], "question": "which entity has a person speaking?", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "someone is typing on a computer keyboard"], "sample_ids": ["uPDn2BFTHk", "v0x1odnXtP0"], "start_seconds": ["140", "210"], "properties": ["lady, laugh, baby", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["birds chirp and a dog breathes heavily", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["y2ZBGpgbhHM", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["dog, chirp, breathe", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["birds chirping and a dog panting", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man speaks, then dials a rotary telephone", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["tK4VlLsNxak", "uYT5gxnyMWM"], "start_seconds": ["120", "50"], "properties": ["a, dial, telephone", "a, scream, girl"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "wind blows as people chatter quietly"], "sample_ids": ["tPJvjq9QePY", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["animal, bleat, moo", "wind, chatter, people"], "captions_pred_video": ["a dog and a sheep in a barn", "footage is blurry and out of focus"], "captions_pred_audio": ["a baby cries and a man speaks", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a goat bleats as a person speaks", "wind blows as people chatter quietly"], "sample_ids": ["tPJvjq9QePY", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["bleats, person, speak", "wind, chatter, people"], "captions_pred_video": ["a dog and a sheep in a barn", "footage is blurry and out of focus"], "captions_pred_audio": ["a baby cries and a man speaks", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["uqFtmnhuqA8", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["a, b, c", "a woman, something, fried"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "- a woman cooking in the kitchen"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a horse runs while two women talk", "a man speaks as horns blow"], "sample_ids": ["sdvI1mHAsc", "tHyNqRyK34A"], "start_seconds": ["20", "24"], "properties": ["two women, horse, run", "a, man, speaks"], "captions_pred_video": [null, "being taken from inside a vehicle on the street at night"], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a man is speaking and a car is honking with background noise "], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "paper is crumpling consistently"], "sample_ids": ["x9JovgqUcs", "v5cSxLaHADY"], "start_seconds": ["500", "0"], "properties": ["a, man, speaks, keyboard", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man speaks and types on a keyboard", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["some men converse over an engine running", "wind blows as people chatter quietly"], "sample_ids": ["sCiy7QS1U", "xBxDz0CFVn0"], "start_seconds": ["300", "30"], "properties": ["men, converse, engine", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["tapping occurs then a baby cries", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["wIJK3-5y0kA", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["a, cry, baby", "loud, jet engine, roar"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a baby cries and a woman speaks", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["u7C-AEBQM", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["ticks, rhythmic, quiet", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a ticktock of a clock", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["vbZ-0lGPneg", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["a woman, a television program, a bird", "People, motor, brakes"], "captions_pred_video": ["of a man holding a baby duck in his hands", null], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running?", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "an airplane engine runs"], "sample_ids": ["vZAw4apG0Es", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["background, tick, repeat", "engine, airplane, runs"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a clock is ticking and people are talking", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "water rushes by"], "sample_ids": ["u--KhUW8l1Y", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["horn, siren, life", "water, rushes, by"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["a toilet flushes and water drains", "dishes cling together then a man begins to speak"], "sample_ids": ["sfAvvZwdLCY", "sQGXqGcwOTc"], "start_seconds": ["20", "3"], "properties": ["water drains, flushes, water", "cling, speak, dishes"], "captions_pred_video": ["footage of the toilet in the bathroom", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a toilet is flushed", "mechanisms are operating and water is splashing "], "question": "which entity is about water", "label": 0}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["voJh2gJxXhA", "tiDFTC-5vU"], "start_seconds": ["50", "30"], "properties": ["music, frog, croak", "male, duck, laugh"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", null], "captions_pred_audio": ["music is playing and crickets are chirping ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["birds coo incessantly", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["yZrFNS7GFBQ", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["coo, bird, incessant", "two men, woman, birds"], "captions_pred_video": ["of the bird in the cage", null], "captions_pred_audio": ["an owl hoots in the background ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a video of birds?", "label": 0}, {"captions": ["a man speaks as a machine runs", "a machine beeps continuously"], "sample_ids": ["vD6lYD1l0BY", "y682ml90jGw"], "start_seconds": ["330", "11"], "properties": ["a, machine, run", "beeps, machine, continuously"], "captions_pred_video": ["game controller being held in the hands of the person", null], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a beeping sound is being made "], "question": "which machine is beeping continuously?", "label": 0}, {"captions": ["a person is snoring while sleeping", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["vJrjSeP17yE", "y8WEcpOlT3I"], "start_seconds": ["40", "40"], "properties": ["a person is sleeping, snoring, person", "harsh, wind, blows"], "captions_pred_video": ["a black background with a small plane flying in the sky", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking with wind noise in the background "], "question": "which entity is a person", "label": 0}, {"captions": ["several insects fly while two men talk", "two men speak as a buffeting wind blows"], "sample_ids": ["s-T9OVOiMLo", "y8WEcpOlT3I"], "start_seconds": ["330", "40"], "properties": ["several, fly, men", "wind, speak, buffeting"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is about a buffeting wind?", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "water flows and trickles"], "sample_ids": ["t97k0cejSQE", "tB7hWb9gTuQ"], "start_seconds": ["250", "30"], "properties": ["bird, chirp, insect", "water, flow, trickle"], "captions_pred_video": ["a bee on a purple thistle flower", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "a mechanical buzzing getting louder"], "sample_ids": ["u--KhUW8l1Y", "sEprKHm8Sj8"], "start_seconds": ["0", "90"], "properties": ["horn, siren, life", "noise, loud, buzzing"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a race car accelerates and revs its engine "], "question": "which entity is a noise", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vZAw4apG0Es", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["background, clock, ticktocks", "wind, blow, vehicle"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a clock ticking in the background?", "label": 0}, {"captions": ["a heavy rain falls endlessly", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["wP8ZKrlx3oA", "su6FAOcOA8c"], "start_seconds": ["40", "4"], "properties": ["heavy, rain, fall", "engine, idle, woman"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "small dogs growl, bark and yip."], "sample_ids": ["yRx9txMcBl0", "sShpyu2l4YQ"], "start_seconds": ["40", "0"], "properties": ["accelerates, tires, squeals", "growl, bark, yip"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "the puppies are playing with a toy"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a dog is barking and growling"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["an airplane engine runs", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["yVPZ2MNWpms", "xKB8O8LTs6s"], "start_seconds": ["0", "70"], "properties": ["engine, airplane, runs", "music, gunfire, explosion"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a car is driving by on the road ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is a movie?", "label": 1}, {"captions": ["a power tool runs and touches a surface", "a child speaks in closed space"], "sample_ids": ["zfvPRf3chY", "yW6FWLSLkx4"], "start_seconds": ["290", "40"], "properties": ["power tool, run, touch", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking while a power tool is being used ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is not a power tool", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "a woman speaks as she rubs two objects together"], "sample_ids": ["vs65y4qmyBE", "vzxHnu-SFEw"], "start_seconds": ["340", "80"], "properties": ["engine, run, man", "two objects, woman, speak"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a machine?", "label": 0}, {"captions": ["popping and crackling repeats as men yell and laugh", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["rqu8iB22IY", "w34HjHr6gAY"], "start_seconds": ["5", "30"], "properties": ["sound, repeats, laugh", "beeps, hit, woman"], "captions_pred_video": [null, "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "paper is crumpling consistently"], "sample_ids": ["uYT5gxnyMWM", "v5cSxLaHADY"], "start_seconds": ["50", "0"], "properties": ["a, scream, girl", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "paper is crumpled and crinkled"], "question": "which entity is a video of a girl speaking followed by a scream and more girls talking?", "label": 0}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "water splashes as an animal walks through"], "sample_ids": ["zY3icUyMdh8", "w1ir-sZ3Im8"], "start_seconds": ["20", "90"], "properties": ["dog, bark, engine", "animal, water, splashes"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "water splashes and gurgles as people speak"], "question": "which entity is a video of an animal walking through water?", "label": 1}, {"captions": ["a person is whistling a tune", "an insect buzzes around continuously"], "sample_ids": ["scYRUkrFLiQ", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["a, tune, whistle", "buzzes, continuously, insect"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a person whistling a song", "a fly is buzzing around a microphone "], "question": "which entity is not a person?", "label": 1}, {"captions": ["a baby cries and a woman moans", "an infant crying frantically"], "sample_ids": ["smDKStoHBJo", "zwOBqeFTgiU"], "start_seconds": ["0", "30"], "properties": ["a, cry, woman", "cry, infant, frantically"], "captions_pred_video": ["a man holding a crying baby in his arms", "of the baby crying in the car seat"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a baby cries loudly"], "question": "which entity is crying frantically", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "a child and woman laughs and the woman speaks"], "sample_ids": ["sU53zg9Jp7s", "uPDn2BFTHk"], "start_seconds": ["380", "140"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "woman, laughs, speaks"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", null], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a baby laughs and a woman speaks"], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "a man speaks as a machine runs"], "sample_ids": ["sWZzXuWYY", "vD6lYD1l0BY"], "start_seconds": ["420", "330"], "properties": ["male, clanks, thumps", "a, machine, run"], "captions_pred_video": [null, "game controller being held in the hands of the person"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a man is speaking and dishes are being washed "], "question": "which machine is running", "label": 1}, {"captions": ["a woman and man speak while food is frying", "people applaud and hoot and chat quietly"], "sample_ids": ["zk-xJGQU8-4", "wwyfGO2J4"], "start_seconds": ["130", "90"], "properties": ["food, man, woman", "people, applaud, hoot"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", null], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wvKpEYswXO0", "vJ7JPEFhyLA"], "start_seconds": ["150", "16"], "properties": ["water, tap, run", "three men, wind, flow"], "captions_pred_video": ["of the person preparing food in the kitchen", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water flowing?", "label": 0}, {"captions": ["an engine runs and wind blows", "water pouring and bubbling"], "sample_ids": ["vs65y4qmyBE", "uyRfq-jKPpo"], "start_seconds": ["340", "50"], "properties": ["engine, run, wind", "water, bubbles, pouring"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["ugHJF0hfYkg", "yDoT73BWsdA"], "start_seconds": ["10", "10"], "properties": ["engine, running, continuously", "engine, revs, vehicle"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a helicopter is flying overhead ", "a race car accelerates and revs its engine "], "question": "which entity has a running engine", "label": 0}, {"captions": ["a toilet door squeaks as it is opened", "water splashes and a door squeaks"], "sample_ids": ["sdXV-ylviw", "sdXV-ylviw"], "start_seconds": ["190", "190"], "properties": ["door, toilet, squeaks", "sound, splash, door"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dog barks and taps with background noise ", "a dog barks and taps with background noise "], "question": "which door is squeaking", "label": 1}, {"captions": ["small dogs yip and bark sharply", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["v-wcQf4BDY0", "uYT5gxnyMWM"], "start_seconds": ["120", "50"], "properties": ["bark, yip, sharply", "a, scream, girl"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a dog barks and growls", "a woman is speaking and a baby is crying"], "question": "which entity is a human", "label": 1}, {"captions": ["a person is snoring while sleeping", "someone is snoring while sleeping"], "sample_ids": ["vJrjSeP17yE", "ujMt0-D-x2k"], "start_seconds": ["40", "0"], "properties": ["a person is sleeping, snoring, person", "snore, sleep, someone"], "captions_pred_video": ["a black background with a small plane flying in the sky", "of the dog playing with a toy on the floor"], "captions_pred_audio": ["a person snoring loudly", "a person is snoring loudly"], "question": "which entity is snoring while sleeping", "label": 0}, {"captions": ["an audience gives applause as a man yells and a group sings", "waves crash against a shoreline and people speak"], "sample_ids": ["tdWhHV3X25Q", "yFB25fqfU8I"], "start_seconds": ["60", "300"], "properties": ["applause, audience, yells", "wave, crash, shoreline"], "captions_pred_video": ["a man is talking to another man on a stage in front of a microphone", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["un9VQlzgZM", "zFjIWfSD-4"], "start_seconds": ["5", "410"], "properties": ["females, talk, laugh", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a toilet flushes and water drains"], "sample_ids": ["v0x1odnXtP0", "sfAvvZwdLCY"], "start_seconds": ["210", "20"], "properties": ["keyboard, type, computer", "water drains, flushes, water"], "captions_pred_video": ["how to make money on youtube in spanish", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a person is typing on a keyboard", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["water flows followed by women screaming", "a man speaks as a motor runs in the background"], "sample_ids": ["w5W5Kqtc8E", "xZepNM9qcRA"], "start_seconds": ["100", "30"], "properties": ["water, flow, women", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["people clap and speak in the distance", "an insect buzzes around continuously"], "sample_ids": ["wwyfGO2J4", "v25l1jef3JY"], "start_seconds": ["90", "0"], "properties": ["clap, distance, speak", "buzzes, continuously, insect"], "captions_pred_video": [null, "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a fly is buzzing around a microphone "], "question": "which entity is not a person", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "water splashes as an animal walks through"], "sample_ids": ["uKCSGgof8gI", "w1ir-sZ3Im8"], "start_seconds": ["12", "90"], "properties": ["chirps, distance, signal", "animal, water, splashes"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a person is whistling a tune", "people cheer as a vehicle engine revs"], "sample_ids": ["scYRUkrFLiQ", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["a, tune, whistle", "engine revs, vehicle, people"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a person whistling a song", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["children cry and people talk", "a man speaks as a car is passing by"], "sample_ids": ["xLwHe825Zs", "sK4u5T8hW78"], "start_seconds": ["18", "30"], "properties": ["people talk, children cry, people talk", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a train horn blares as a train passes, then fades", "a car accelerates and wind blows"], "sample_ids": ["zVacuqSb4LI", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["blares, fades, train", "accelerates, wind, blows"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", null], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["someone is snoring while sleeping", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["ujMt0-D-x2k", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["snore, sleep, someone", "female, spraying, scream"], "captions_pred_video": ["of the dog playing with a toy on the floor", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a baby cries and fusses, a woman speaks, and a man speaks"], "sample_ids": ["x5cuQjOdM3E", "wyllXV6PjKo"], "start_seconds": ["30", "30"], "properties": ["cat, meows, young woman", "a baby, a woman, a man"], "captions_pred_video": ["a black background with an airplane flying in the sky", null], "captions_pred_audio": ["a cat meows and a woman speaks", "a woman speaks and a baby cries"], "question": "which entity is a child", "label": 1}, {"captions": ["wind blows strongly", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["w8uLijTqtlU", "yajyRTUQk3U"], "start_seconds": ["70", "400"], "properties": ["wind, blows, strongly", "a woman, something, fried"], "captions_pred_video": ["footage is blurry and shaky", "- a woman cooking in the kitchen"], "captions_pred_audio": ["the wind is blowing strongly", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["an aircraft engine runs", "water pouring and bubbling"], "sample_ids": ["yLCORCnd35Q", "uyRfq-jKPpo"], "start_seconds": ["0", "50"], "properties": ["engine, aircraft, runs", "water, bubbles, pouring"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a baby cries and a woman speaks", "females talk and laugh over gusting wind"], "sample_ids": ["tMbMDvT50j8", "un9VQlzgZM"], "start_seconds": ["12", "5"], "properties": ["a, cry, woman", "females, talk, laugh"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a woman is speaking and laughing with wind noise and breathing in the background "], "question": "which entity is a group of people", "label": 1}, {"captions": ["birds coo incessantly", "a man speaks as a car is passing by"], "sample_ids": ["yZrFNS7GFBQ", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["coo, bird, incessant", "a, car, pass"], "captions_pred_video": ["of the bird in the cage", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["an owl hoots in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "a man speaks followed by another man speaking outside"], "sample_ids": ["wRBHTgrbiwg", "viuTg1M-dqg"], "start_seconds": ["50", "30"], "properties": ["bird, owl, speak", "two men, speak, follow"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more than one speaker", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "wind blowing followed by a zoom"], "sample_ids": ["w5W5Kqtc8E", "vr8ZXjEBhMQ"], "start_seconds": ["100", "150"], "properties": ["wind, blow, vehicle", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "wind blows and a chainsaw cuts through wood "], "question": "which entity has a zoom?", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["ul60S8TXDA8", "su6FAOcOA8c"], "start_seconds": ["60", "4"], "properties": ["sound, distance, bell", "engine, idle, woman"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "waves crash against a shoreline and people speak"], "sample_ids": ["tDlysoZiA1I", "yFB25fqfU8I"], "start_seconds": ["0", "300"], "properties": ["animal, grunt, multiple", "wave, crash, shoreline"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage of a person surfing in the ocean"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more active", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "an airplane engine spools and people speak"], "sample_ids": ["uYT5gxnyMWM", "wTjoRj1se3U"], "start_seconds": ["50", "390"], "properties": ["person, spray, yell", "airplane, engine, spool"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a jet engine is running and people are talking"], "question": "which entity is a video of a person speaking over spraying and another person yelling?", "label": 0}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["uqFtmnhuqA8", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["a, b, c", "loud, laughter, intermittent"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more like a scream", "label": 1}, {"captions": ["wind noise makes sound into a microphone", "a duck quacks continuously"], "sample_ids": ["w8uLijTqtlU", "vh30P49Po6s"], "start_seconds": ["70", "30"], "properties": ["wind, microphone, noise", "quacks, continuously, duck"], "captions_pred_video": ["footage is blurry and shaky", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["the wind is blowing strongly", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a man laughs and speaks as cats purr and hiss"], "sample_ids": ["x5cuQjOdM3E", "vVhthZ45k3Y"], "start_seconds": ["30", "30"], "properties": ["cat, meows, young woman", "cat, purr, hiss"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage is blurry and out of focus"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking and a cat is meowing"], "question": "which entity is more playful", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "an engine runs loudly"], "sample_ids": ["w5W5Kqtc8E", "vqZuVbG6-HI"], "start_seconds": ["100", "130"], "properties": ["wind, engine, scream", "loud, engine, run"], "captions_pred_video": [null, "footage is blurry because it's raining outside"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["w5W5Kqtc8E", "ziUT9IFTkjg"], "start_seconds": ["100", "10"], "properties": ["water, splashes, motorboat", "background, birds, rustling"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["someone snores nearby", "a man speaks as birds chirp and a vehicle passes nearby"], "sample_ids": ["spJCm8tD9Zo", "siJFXfGWgDk"], "start_seconds": ["90", "50"], "properties": ["someone snores, nearby, someone", "a, bird, vehicle"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and birds are chirping in the background "], "question": "which entity is more likely to be a person", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "an insect buzzes around continuously"], "sample_ids": ["vf9xf3vMsGM", "v25l1jef3JY"], "start_seconds": ["540", "0"], "properties": ["A man speaks while turning a water faucet on.", "buzzes, continuously, insect"], "captions_pred_video": ["of the person washing their hands under the faucet", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["food is frying while a woman speaks", "people cheer as a vehicle engine revs"], "sample_ids": ["yhQ2Lg-7qDY", "xjhAnI2q6hM"], "start_seconds": ["130", "6"], "properties": ["food, woman, speak", "engine revs, vehicle, people"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a truck is revving its engine and a man is speaking "], "question": "what is the woman doing while the food is frying?", "label": 0}, {"captions": ["water is sprayed across a hard surface", "a man is snoring loudly and repeatedly"], "sample_ids": ["sQwlkXjQabo", "sncRqQ67iJU"], "start_seconds": ["10", "460"], "properties": ["water, spray, surface", "loud, repeatedly, man"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "of an airplane flying in the dark sky at night"], "captions_pred_audio": ["spraying followed by silence", "a person is snoring"], "question": "which entity is louder", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "wind blowing followed by a zoom"], "sample_ids": ["yajyRTUQk3U", "vr8ZXjEBhMQ"], "start_seconds": ["400", "150"], "properties": ["noise, woman, speak", "wind, blow, zoom"], "captions_pred_video": ["- a woman cooking in the kitchen", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "food is frying while a woman speaks"], "sample_ids": ["wz7N8YRy74I", "yhQ2Lg-7qDY"], "start_seconds": ["30", "130"], "properties": ["rooster, crow, background, people", "food, woman, speak"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "a pan filled with meat and sauce being cooked on a stove top"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a faucet is running and a man is speaking"], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["pigeons vocalize and birds chirp", "a dog whimpers as someone inhales/exhales briefly"], "sample_ids": ["uiS58TNyUiw", "vmrxwuAMb2I"], "start_seconds": ["430", "40"], "properties": ["vocalize, bird, chirp", "a dog, inhales, exhales"], "captions_pred_video": ["of the pigeon in the cage", "of the dog laying on the bed with his head out of the blanket"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a dog barks and growls"], "question": "which animal is not vocalizing", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "a propeller rotates loudly and intensely"], "sample_ids": ["y4tPJXBKDig", "ugHJF0hfYkg"], "start_seconds": ["20", "10"], "properties": ["a, noise, talk", "loud, intense, propeller"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a helicopter is flying overhead "], "question": "which noise is louder", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wqUmIEzuNz4", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["frog, bird, vocalize", "music, gunfire, explosion"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a cat meows and rustles", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["x6ijhqRY38s", "xKB8O8LTs6s"], "start_seconds": ["250", "70"], "properties": ["bowl, silverware, man", "music, gunfire, explosion"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zl9Dqx-j7q4", "yajyRTUQk3U"], "start_seconds": ["6", "400"], "properties": ["motors rev, laugh, loudly", "a woman, something, fried"], "captions_pred_video": ["footage of a man driving a car in the dark", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a jet engine roars ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "a stream of water runs briefly"], "sample_ids": ["sSMl2vc3ek", "x-PeY8Yb8M4"], "start_seconds": ["20", "300"], "properties": ["a person, laughs, snores", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a person snoring loudly", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["some men converse over an engine running", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["sCiy7QS1U", "w5W5Kqtc8E"], "start_seconds": ["300", "100"], "properties": ["men, converse, engine", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a vehicle engine running?", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vW4x7S1VfQc", "wz7N8YRy74I"], "start_seconds": ["150", "30"], "properties": ["clacking, oil, woman", "rooster, crow, background, men"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["food sizzles in a frying pan", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "someone is typing on a computer keyboard"], "sample_ids": ["sjlVMgdGSK0", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["accelerates, vehicle, race car", "keyboard, type, computer"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "how to make money on youtube in spanish"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a person is typing on a keyboard"], "question": "which is a stationary object", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "people speak as gunfire rings out"], "sample_ids": ["yeFvk9x0wWI", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["clack, bird, chirp", "gunfire, ring, speak"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "an insect buzzes around continuously"], "sample_ids": ["x6ijhqRY38s", "v25l1jef3JY"], "start_seconds": ["250", "0"], "properties": ["something metal, glass, hit", "buzzes, continuously, insect"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["a train horn sounds and railroad crossing ring", "children cheer as a man speaks then an audience screams"], "sample_ids": ["s7knHCFW82w", "vJvryTwuAV8"], "start_seconds": ["30", "16"], "properties": ["horn, sound, train", "audience, cheer, man"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a man is speaking and a crowd is shouting and whooping "], "question": "which entity is a person speaking to an audience?", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["zl9Dqx-j7q4", "zj2R0XoFr5k"], "start_seconds": ["6", "50"], "properties": ["motors rev, laugh, loudly", "airplane, boy, fly"], "captions_pred_video": ["footage of a man driving a car in the dark", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a jet engine roars ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "winds blows roughly as a vehicle races past"], "sample_ids": ["zj2R0XoFr5k", "xjvTpk2Zpr8"], "start_seconds": ["50", "70"], "properties": ["airplane, fly, overhead", "wind, blows, vehicle"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a jet engine roars and wind blows "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "water is sprayed across a hard surface"], "sample_ids": ["y1saVTXsKwc", "sQwlkXjQabo"], "start_seconds": ["80", "10"], "properties": ["a, dog, talk", "water, spray, surface"], "captions_pred_video": ["a dog playing with a pink ball", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a dog barks and a man speaks", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vmrxwuAMb2I", "wz7N8YRy74I"], "start_seconds": ["40", "30"], "properties": ["a dog, inhales, exhales", "rooster, crow, background, men"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a dog barks and growls", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a person", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "an airplane engine roars increasingly louder"], "sample_ids": ["sjlVMgdGSK0", "vBslzh7saPw"], "start_seconds": ["30", "90"], "properties": ["accelerates, vehicle, race car", "engine, roar, louder"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "a pickup truck carrying a large object down the road"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a jet engine roars and accelerates "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "a toilet flushes and water drains unevenly"], "sample_ids": ["yYEVLuqEytU", "vhJWZheqaE"], "start_seconds": ["40", "0"], "properties": ["grunt, slurp, background", "water drains unevenly, toilet flushes, water drains"], "captions_pred_video": ["a baby goat is being petted by a person's hand", null], "captions_pred_audio": ["several sheep bleat and a man speaks", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "people speaking indiscriminately in the distance with a person snoring loudly nearby"], "sample_ids": ["sAam2NqGhLY", "w2JXXIAdUdg"], "start_seconds": ["20", "10"], "properties": ["snoring, breathing, child", "snoring, distance, person"], "captions_pred_video": ["of a little girl sleeping on a couch", "a close up shot of a person's mouth with a toothbrush in it"], "captions_pred_audio": ["a person is snoring", "a person snoring and a dog whimpering"], "question": "which entity is a person", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "a telephone rings followed by a woman talking"], "sample_ids": ["x9JovgqUcs", "tGcFnX0GHI"], "start_seconds": ["500", "0"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks and types on a keyboard", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["someone is snoring while sleeping", "a woman speaks as she rubs two objects together"], "sample_ids": ["ujMt0-D-x2k", "vzxHnu-SFEw"], "start_seconds": ["0", "80"], "properties": ["snore, sleep, someone", "two objects, woman, speak"], "captions_pred_video": ["of the dog playing with a toy on the floor", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "people speak as gunfire rings out"], "sample_ids": ["uJV8NDaHqqk", "wqTCwqVRDlk"], "start_seconds": ["100", "80"], "properties": ["loud, fly, chirp", "gunfire, ring, speak"], "captions_pred_video": ["a bee hive in a wooden box", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking and a gun is fired"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "a telephone rings followed by a woman talking"], "sample_ids": ["y8WEcpOlT3I", "tGcFnX0GHI"], "start_seconds": ["40", "0"], "properties": ["harsh, wind, blows", "ring, talk, woman"], "captions_pred_video": ["on how to use a sewing machine youtube", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "a drill drills through something then people begin laughing"], "sample_ids": ["ylpYOorfH4o", "tEE3MpBt1sg"], "start_seconds": ["410", "50"], "properties": ["engine, run, loud", "drill, something, laugh"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a man is speaking and an engine is revving", "people are laughing breathing and speaking with background noise "], "question": "which entity is a drill?", "label": 1}, {"captions": ["a infant makes noise and is excited", "water splashes as an animal walks through"], "sample_ids": ["wIJK3-5y0kA", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["noise, excited, infant", "animal, water, splashes"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a baby cries and a woman speaks", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "winds blows roughly as a vehicle races past"], "sample_ids": ["xyL9F5VrjkE", "xjvTpk2Zpr8"], "start_seconds": ["20", "70"], "properties": ["wind, blows, vehicle", "wind, blows, vehicle"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a jet engine roars and wind blows "], "question": "which entity shows a vehicle running?", "label": 0}, {"captions": ["an infant crying as a woman laughs", "three men talk while wind blows and some liquid flows"], "sample_ids": ["xhmRY9yhC7c", "vJ7JPEFhyLA"], "start_seconds": ["20", "16"], "properties": ["a, laugh, infant", "three men, wind, flow"], "captions_pred_video": ["of a baby crying in a baby bouncer", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a woman and an infant?", "label": 0}, {"captions": ["food is frying while a woman speaks", "an airplane engine runs"], "sample_ids": ["yhQ2Lg-7qDY", "yVPZ2MNWpms"], "start_seconds": ["130", "0"], "properties": ["food, woman, speak", "engine, airplane, runs"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a diesel truck engine runs continuously", "some tunes played by whistling"], "sample_ids": ["sZvwOuuPGP0", "u6BnG6YZqJ4"], "start_seconds": ["50", "0"], "properties": ["engine, diesel, truck", "tune, play, whistling"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a medium engine is running ", "a person whistling a song"], "question": "which entity is not a vehicle", "label": 1}, {"captions": ["an aircraft engine runs", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["yLCORCnd35Q", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["engine, aircraft, runs", "a woman, laughs, animal"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "waves crash against a shoreline and people speak"], "sample_ids": ["uqFtmnhuqA8", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["a, b, c", "wave, crash, shoreline"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "footage of a person surfing in the ocean"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more like a natural phenomenon", "label": 1}, {"captions": ["a woman speaks and is crumpling paper", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["xvDdE3zNf8Y", "wqZ135Ssz0"], "start_seconds": ["120", "60"], "properties": ["A, crumple, paper", "two men, woman, birds"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", null], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["footsteps followed by a flushing toilet", "a man speaks while water drains"], "sample_ids": ["yXrw3GRMZag", "vSeGhaZt-aI"], "start_seconds": ["40", "50"], "properties": ["sound, toilet, flush", "water, drain, man"], "captions_pred_video": ["footage of a toilet bowl with water in it", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["rustling followed by a toilet flushing", "a man is speaking and pouring liquid with background noise "], "question": "which entity has a man speaking while water drains?", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "an electric engine works nearby followed by a child talking"], "sample_ids": ["wTjoRj1se3U", "xSKJGCItUWE"], "start_seconds": ["390", "10"], "properties": ["engine, run, people", "engine, work, child"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "footage of the helicopter flying in the room"], "captions_pred_audio": ["a jet engine is running and people are talking", "a high pitched engine is running and a child speaks"], "question": "which entity has a child talking?", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vhJWZheqaE", "tdWhHV3X25Q"], "start_seconds": ["0", "60"], "properties": ["water drains unevenly, toilet flushes, water drains", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "a train horn blows as it passes by"], "sample_ids": ["xV7Mg1QucSc", "zVacuqSb4LI"], "start_seconds": ["14", "30"], "properties": ["alarm, ticktocks, laughs", "horn, blows, train"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train?", "label": 1}, {"captions": ["a helicopter engine runs continuously", "an insect buzzes around continuously"], "sample_ids": ["ugHJF0hfYkg", "v25l1jef3JY"], "start_seconds": ["10", "0"], "properties": ["engine, running, continuously", "buzzes, continuously, insect"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a helicopter is flying overhead ", "a fly is buzzing around a microphone "], "question": "which entity is not a helicopter?", "label": 1}, {"captions": ["a child speaks", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["yW6FWLSLkx4", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["a, child, speaks", "a woman, laughs, animal"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by an electronic beep", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["wSVhSdj0F0", "xfaoyyzw2WU"], "start_seconds": ["10", "180"], "properties": ["horn honks, keys jingle, electronic beep", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "a propeller rotates loudly and intensely"], "sample_ids": ["zl9Dqx-j7q4", "ugHJF0hfYkg"], "start_seconds": ["6", "10"], "properties": ["motors rev, laugh, loudly", "loud, intense, propeller"], "captions_pred_video": ["footage of a man driving a car in the dark", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a jet engine roars ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "frogs croak and vocalize"], "sample_ids": ["xl2PIWyXaM", "yswmmRZFItk"], "start_seconds": ["160", "0"], "properties": ["chirp, man, younger person", "croak, vocalize, frog"], "captions_pred_video": [null, "a close up of a frog in the water"], "captions_pred_audio": ["birds are chirping and people are talking", "a frog is croaking"], "question": "which animal is more vocal", "label": 1}, {"captions": ["some people speak", "continuous snoring"], "sample_ids": ["vbZ-0lGPneg", "sLkeqCDJIyw"], "start_seconds": ["30", "120"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "loud, snoring, noise"], "captions_pred_video": ["of a man holding a baby duck in his hands", ", what is the man doing on the couch? sleeping"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a person is snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a small engine idles continuously", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["y5WII6cTH7k", "xKB8O8LTs6s"], "start_seconds": ["40", "70"], "properties": ["engine, idle, continuously", "music, gunfire, explosion"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["an engine is knocking and vibrating ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "a man speaks as a car is passing by"], "sample_ids": ["yDoT73BWsdA", "sK4u5T8hW78"], "start_seconds": ["10", "30"], "properties": ["engine, revs, vehicle", "a, car, pass"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking with background noise and breathing sounds "], "question": "which vehicle is passing by", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["wSVhSdj0F0", "w34HjHr6gAY"], "start_seconds": ["10", "30"], "properties": ["horn honks, keys jingle, slam", "beeps, hit, woman"], "captions_pred_video": [null, "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "frogs croak and vocalize"], "sample_ids": ["uWAAAL4CIoc", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["a woman, chirps, animal", "croak, vocalize, frog"], "captions_pred_video": [null, "a close up of a frog in the water"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a frog is croaking"], "question": "which animal is speaking", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["xjvTpk2Zpr8", "yajyRTUQk3U"], "start_seconds": ["70", "400"], "properties": ["wind, blows, vehicle", "a woman, something, fried"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["motors runs briefly and tires screech", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["yRx9txMcBl0", "ukg5L09Wpvo"], "start_seconds": ["40", "150"], "properties": ["motors, tires, screech", "clickety-clack, train, whistle"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "a stream of water runs briefly"], "sample_ids": ["y2bVZ7rz-5M", "x-PeY8Yb8M4"], "start_seconds": ["280", "300"], "properties": ["engine, horn, siren", "stream, water, run"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a car is driving on a wet road "], "question": "which entity is a moving object", "label": 0}, {"captions": ["food is frying while a woman speaks", "an airplane engine spools and people speak"], "sample_ids": ["yhQ2Lg-7qDY", "wTjoRj1se3U"], "start_seconds": ["130", "390"], "properties": ["food, woman, speak", "airplane, engine, spool"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a jet engine is running and people are talking"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["vehicles pass by on a roadway", "paper folding and crinkling"], "sample_ids": ["tgbONvsP47Y", "zPpG3RD8lSs"], "start_seconds": ["0", "20"], "properties": ["pass, vehicle, roadway", "paper, fold, crinkle"], "captions_pred_video": ["footage of a fire truck entering a garage", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a car is driving on the road ", "the wind blows and a mouse clicks "], "question": "which is not a vehicle", "label": 1}, {"captions": ["a toilet flushes and water drains", "a woman speaks as she rubs two objects together"], "sample_ids": ["sfAvvZwdLCY", "vzxHnu-SFEw"], "start_seconds": ["20", "80"], "properties": ["water drains, flushes, water", "two objects, woman, speak"], "captions_pred_video": ["footage of the toilet in the bathroom", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "a person is burping while a girl speaks"], "sample_ids": ["wqZ135Ssz0", "vdoxuJn9lTc"], "start_seconds": ["60", "40"], "properties": ["man, woman, squawks", "person, burp, girl"], "captions_pred_video": [null, "a group of young girls playing a video game together in a living room"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a child speaks followed by a burp"], "question": "which entity has a girl speaking?", "label": 1}, {"captions": ["multiple birds vocalize and wind blows", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["uoGVs9yUqY4", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["multiple, vocalize, wind", "a woman, something, fried"], "captions_pred_video": ["for how to make a wooden shed door youtube", "- a woman cooking in the kitchen"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "a man laughs and speaks as cats purr and hiss"], "sample_ids": ["xyL9F5VrjkE", "vVhthZ45k3Y"], "start_seconds": ["20", "30"], "properties": ["wind, blows, vehicle", "cat, purr, hiss"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage is blurry and out of focus"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man is speaking and a cat is meowing"], "question": "which entity is more likely to be a cat", "label": 1}, {"captions": ["water flows followed by women screaming", "a man speaks as a car is passing by"], "sample_ids": ["w5W5Kqtc8E", "sK4u5T8hW78"], "start_seconds": ["100", "30"], "properties": ["water, flow, women", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "people applaud and hoot and chat quietly"], "sample_ids": ["u5RmF3c3Aw", "wwyfGO2J4"], "start_seconds": ["60", "90"], "properties": ["engine, car, zoom", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vh30P49Po6s", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["loud, continuous, quacks", "men, talk, cars"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["a propeller moves loudly nearby", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["ugHJF0hfYkg", "xfaoyyzw2WU"], "start_seconds": ["10", "180"], "properties": ["loud, propeller, move", "loud, jet engine, roar"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a helicopter is flying overhead ", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "an insect buzzes around continuously"], "sample_ids": ["t8CV69hcvF0", "v25l1jef3JY"], "start_seconds": ["210", "0"], "properties": ["person, sneeze, follow", "buzzes, continuously, insect"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a woman sneezes and speaks", "a fly is buzzing around a microphone "], "question": "which entity is not a person?", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vf44CgrjT0A", "tdWhHV3X25Q"], "start_seconds": ["20", "60"], "properties": ["loud, long, person", "applause, audience, yells"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a loud burp", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["an electric engine works nearby followed by a child talking", "multiple people speak and children yell while water gurgles"], "sample_ids": ["xSKJGCItUWE", "vb1fPSDI4c"], "start_seconds": ["10", "30"], "properties": ["engine, work, child", "multiple, people, yell"], "captions_pred_video": ["footage of the helicopter flying in the room", null], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking?", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["tDVADusiIoc", "xfaoyyzw2WU"], "start_seconds": ["60", "180"], "properties": ["water, radio, man", "loud, jet engine, roar"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vKrYfzleLB8", "xfaoyyzw2WU"], "start_seconds": ["110", "180"], "properties": ["a, ring, gunshots", "loud, jet engine, roar"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "vehicles pass by on a roadway"], "sample_ids": ["s6DESzUTGjY", "tgbONvsP47Y"], "start_seconds": ["16", "0"], "properties": ["wind, laugh, woman", "pass, vehicle, roadway"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a car is driving on the road "], "question": "which entity is more likely to be seen in a movie", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vzxHnu-SFEw", "zj2R0XoFr5k"], "start_seconds": ["80", "50"], "properties": ["two objects, woman, speak", "airplane, boy, fly"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a woman speaks while a helicopter flies overhead "], "question": "which object is moving", "label": 1}, {"captions": ["a child speaks", "an airplane engine runs"], "sample_ids": ["yW6FWLSLkx4", "yVPZ2MNWpms"], "start_seconds": ["40", "0"], "properties": ["a, child, speaks", "engine, airplane, runs"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a car is driving by on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "birds chirp and objects are moved around"], "sample_ids": ["sTpirNYo8vQ", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["a, tone, fast", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a man taking a selfie on a bus", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "someone is typing on a computer keyboard"], "sample_ids": ["tOj4tdLRaA", "v0x1odnXtP0"], "start_seconds": ["70", "210"], "properties": ["woman, laugh, baby", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a person is typing on a keyboard"], "question": "which is not a person", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "water flows and trickles"], "sample_ids": ["uzQnlJXBbOM", "tB7hWb9gTuQ"], "start_seconds": ["50", "30"], "properties": ["ringing, beep, stop", "water, flow, trickle"], "captions_pred_video": ["footage of a person using a cell phone on a table", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a telephone rings and a man speaks", "water is splashing and gurgling"], "question": "which entity is a continuous flow", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "birds chirp, a woman speaks, and insects buzz"], "sample_ids": ["rqu8iB22IY", "t97k0cejSQE"], "start_seconds": ["5", "250"], "properties": ["sound, repeats, laugh", "sound, chirp, buzz"], "captions_pred_video": [null, "a bee on a purple thistle flower"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a bee buzzes and a woman speaks"], "question": "which entity has a higher pitch", "label": 1}, {"captions": ["a machine runs continuously", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wdXV3Pv0jiY", "sSMl2vc3ek"], "start_seconds": ["11", "20"], "properties": ["machine, running, continuously", "loud, multiple, distance"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a person snoring loudly"], "question": "which entity is not a machine?", "label": 1}, {"captions": ["an animal quacks rapidly", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vh30P49Po6s", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["animal, quacks, rapidly", "stream, water, flow"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage is blurry and out of focus"], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "people applaud and hoot and chat quietly"], "sample_ids": ["sLUnaPT5gM8", "wwyfGO2J4"], "start_seconds": ["0", "90"], "properties": ["loud, laughter, intermittent", "people, applaud, hoot"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", null], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "a car speeding up in the distance"], "sample_ids": ["w34HjHr6gAY", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["beeps, squawk, child speaking", "distance, car, speed"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", null], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "an infant crying frantically"], "sample_ids": ["vdoxuJn9lTc", "zwOBqeFTgiU"], "start_seconds": ["40", "30"], "properties": ["burp, loud, girl", "cry, infant, frantically"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "of the baby crying in the car seat"], "captions_pred_audio": ["a child speaks followed by a burp", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a man speaks as a car is passing by", "wind blows and women speak as livestock vocalizes"], "sample_ids": ["sK4u5T8hW78", "vXlk0lIQBFo"], "start_seconds": ["30", "470"], "properties": ["a, car, pass", "wind, speak, vocalize"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "- a woman and two donkeys in a fenced in area"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "wind chimes are ringing and people are speaking and laughing "], "question": "which entity is about a man speaking as a car is passing by?", "label": 0}, {"captions": ["a man speaks then rubs two hard objects together", "paper folding and crinkling"], "sample_ids": ["yJ0TePmaOo", "zPpG3RD8lSs"], "start_seconds": ["390", "20"], "properties": ["two hard objects, man, speak", "paper, fold, crinkle"], "captions_pred_video": [null, "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "the wind blows and a mouse clicks "], "question": "which object is softer", "label": 1}, {"captions": ["tapping occurs then a baby cries", "a man is snoring loudly and repeatedly"], "sample_ids": ["wIJK3-5y0kA", "sncRqQ67iJU"], "start_seconds": ["30", "460"], "properties": ["a, cry, baby", "loud, repeatedly, man"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "of an airplane flying in the dark sky at night"], "captions_pred_audio": ["a baby cries and a woman speaks", "a person is snoring"], "question": "which entity is louder", "label": 1}, {"captions": ["a person whistles and clicks a mouse", "wind blows as people chatter quietly"], "sample_ids": ["zCrAfDfv6-A", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["person, mouse, click", "wind, chatter, people"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "footage is blurry and out of focus"], "captions_pred_audio": ["a person whistles a song", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["water splashes as an animal walks through", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["w1ir-sZ3Im8", "w5W5Kqtc8E"], "start_seconds": ["90", "100"], "properties": ["animal, water, splashes", "wind, blow, vehicle"], "captions_pred_video": ["footage of a group of people riding horses through a river", null], "captions_pred_audio": ["water splashes and gurgles as people speak", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["continuous snoring", "a train horn blows as it passes by"], "sample_ids": ["sLkeqCDJIyw", "zVacuqSb4LI"], "start_seconds": ["120", "30"], "properties": ["loud, snoring, noise", "horn, blows, train"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a person is snoring loudly", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which noise is louder", "label": 0}, {"captions": ["a girl speaks followed by a scream and more girls talking", "someone whistles a tune"], "sample_ids": ["uYT5gxnyMWM", "sIXTftIuUgw"], "start_seconds": ["50", "90"], "properties": ["a, scream, girl", "someone, tune, whistle"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a motor runs in the distance as a soft wind periodically gusts"], "sample_ids": ["ugHJF0hfYkg", "xyL9F5VrjkE"], "start_seconds": ["10", "20"], "properties": ["engine, running, continuously", "wind, motor, distance"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "of a caterpillar truck loading logs into a trailer"], "captions_pred_audio": ["a helicopter is flying overhead ", "the wind is blowing and a car is passing by "], "question": "which entity is running continuously", "label": 0}, {"captions": ["a girl speaks followed by a scream and more girls talking", "wind blows and women speak as livestock vocalizes"], "sample_ids": ["uYT5gxnyMWM", "vXlk0lIQBFo"], "start_seconds": ["50", "470"], "properties": ["a, scream, girl", "wind, speak, vocalize"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "- a woman and two donkeys in a fenced in area"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "wind chimes are ringing and people are speaking and laughing "], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["tapping occurs then a baby cries", "some men converse over an engine running"], "sample_ids": ["wIJK3-5y0kA", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["a, cry, baby", "men, converse, engine"], "captions_pred_video": ["of a baby playing with a cat in a dark room", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a baby crying?", "label": 0}, {"captions": ["a toilet flushes and water drains", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["sfAvvZwdLCY", "tiDFTC-5vU"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "male, duck, laugh"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "water pouring and bubbling"], "sample_ids": ["t8CV69hcvF0", "uyRfq-jKPpo"], "start_seconds": ["210", "50"], "properties": ["person, sneeze, follow", "water, bubbles, pouring"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a woman sneezes and speaks", "water is running from a faucet"], "question": "which entity is more likely to be in a bathroom", "label": 1}, {"captions": ["a motorcycle engine works nearby", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["tOSWIURC-4", "vfYTJq7nU"], "start_seconds": ["0", "130"], "properties": ["engine, work, nearby", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a lawn mower is running ", "a duck quacks and a woman speaks"], "question": "which entity is a natural event", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["vmrxwuAMb2I", "wqZ135Ssz0"], "start_seconds": ["40", "60"], "properties": ["a dog, inhales, exhales", "two men, woman, birds"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", null], "captions_pred_audio": ["a dog barks and growls", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "people applaud and hoot and chat quietly"], "sample_ids": ["w2JXXIAdUdg", "wwyfGO2J4"], "start_seconds": ["10", "90"], "properties": ["snoring, distance, person", "people, applaud, hoot"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", null], "captions_pred_audio": ["a person snoring and a dog whimpering", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "small dogs yip and bark sharply"], "sample_ids": ["vBslzh7saPw", "v-wcQf4BDY0"], "start_seconds": ["90", "120"], "properties": ["power, scream, increase", "bark, yip, sharply"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["wRBHTgrbiwg", "w5W5Kqtc8E"], "start_seconds": ["50", "100"], "properties": ["bird, owl, speak", "wind, blow, vehicle"], "captions_pred_video": ["of a bee pollinating the flowers in the field", null], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blowing?", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "an engine runs loudly"], "sample_ids": ["vZAw4apG0Es", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["background, tick, repeat", "loud, engine, run"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a clock is ticking and people are talking", "a lawn mower is running and men are speaking "], "question": "which is louder", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["ukxt9I7eMMg", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["continuous, woman, speaking", "music, gunfire, explosion"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more action", "label": 1}, {"captions": ["birds chirp and objects are moved around", "a man speaks as a motor runs in the background"], "sample_ids": ["yPUYU6t3rwo", "xZepNM9qcRA"], "start_seconds": ["370", "30"], "properties": ["birds chirp, objects are moved around, birds", "background, motor, run"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["insects buzz and a man speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "some men converse over an engine running"], "sample_ids": ["zALy31PjDl0", "sCiy7QS1U"], "start_seconds": ["21", "300"], "properties": ["a man, a vehicle, a horn", "men, converse, engine"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", null], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has more people", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "some men converse over an engine running"], "sample_ids": ["zsLxS-uLJTw", "sCiy7QS1U"], "start_seconds": ["20", "300"], "properties": ["horn, blast, train", "men, converse, engine"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", null], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a train", "label": 0}, {"captions": ["water splashes and a motorboat passes as people yell", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["w5W5Kqtc8E", "xKB8O8LTs6s"], "start_seconds": ["100", "70"], "properties": ["water, splashes, motorboat", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "someone is typing on a computer keyboard"], "sample_ids": ["yDoT73BWsdA", "v0x1odnXtP0"], "start_seconds": ["10", "210"], "properties": ["engine, revs, vehicle", "keyboard, type, computer"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "how to make money on youtube in spanish"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a person is typing on a keyboard"], "question": "which is not a vehicle", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "pigeons vocalize and birds chirp"], "sample_ids": ["vW4x7S1VfQc", "uiS58TNyUiw"], "start_seconds": ["150", "430"], "properties": ["clacking, oil, woman", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "of the pigeon in the cage"], "captions_pred_audio": ["food sizzles in a frying pan", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["continuous sneezing together with speech", "a car accelerates and wind blows"], "sample_ids": ["x4dZyf9Gbj0", "u0TrcHhkPQ"], "start_seconds": ["130", "20"], "properties": ["continuous, sneeze, speech", "accelerates, wind, blows"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a woman sneezes and speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "birds chirp and objects are moved around"], "sample_ids": ["t25U-v4k4ts", "yPUYU6t3rwo"], "start_seconds": ["40", "370"], "properties": ["a, chirps, bird", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "insects buzz and a man speaks"], "question": "which entity has more birds", "label": 1}, {"captions": ["water flows followed by women screaming", "a child speaks in closed space"], "sample_ids": ["w5W5Kqtc8E", "yW6FWLSLkx4"], "start_seconds": ["100", "40"], "properties": ["water, flow, women", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["bees buzz and wind blows", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tMJne1a4AFI", "tdWhHV3X25Q"], "start_seconds": ["0", "60"], "properties": ["bees buzz, wind blows, bees", "applause, audience, yells"], "captions_pred_video": ["a swarm of bees on the ground", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a person speaks briefly", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["zOZleIRqZm4", "zFjIWfSD-4"], "start_seconds": ["80", "410"], "properties": ["person, talk, brief", "People, motor, brakes"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a person talking?", "label": 0}, {"captions": ["a small engine idles continuously", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["y5WII6cTH7k", "su6FAOcOA8c"], "start_seconds": ["40", "4"], "properties": ["engine, idle, continuously", "engine, idle, woman"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["a dark barks and whimpers", "a horn honks followed by a loud continuous buzzing while men speak"], "sample_ids": ["sYj4hpDUZDQ", "wsHBIgzs9Fs"], "start_seconds": ["30", "50"], "properties": ["barks, whimpers, dark", "horn, continuous, buzzing"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", "shows a motorcycle riding down a country road with a motorcycle in the foreground"], "captions_pred_audio": ["a dog barks and a cat meows", "a car accelerates and revs its engine while a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "a toilet flushes and a female speaks"], "sample_ids": ["sapQIQUhFc", "yaln9y8I7ms"], "start_seconds": ["280", "230"], "properties": ["water, trickles, flow", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["w5W5Kqtc8E", "yDoT73BWsdA"], "start_seconds": ["100", "10"], "properties": ["wind, engine, scream", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a clock ticktocks briefly", "a race car approaches quickly and slows down squealing tires"], "sample_ids": ["u7C-AEBQM", "sEprKHm8Sj8"], "start_seconds": ["30", "90"], "properties": ["ticktocks, clock, ticktocks briefly", "car, tires, slows"], "captions_pred_video": [null, "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["a ticktock of a clock", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "a stream of water runs briefly"], "sample_ids": ["tEE3MpBt1sg", "x-PeY8Yb8M4"], "start_seconds": ["50", "300"], "properties": ["drill, something, laugh", "stream, water, run"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a man speaks followed by another man speaking outside", "people applaud and hoot and chat quietly"], "sample_ids": ["viuTg1M-dqg", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["two men, speak, follow", "people, applaud, hoot"], "captions_pred_video": ["footage of water coming out of a hole in the ground", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["an engine revs and a turning noise is made", "a train horn blares as a train passes, then fades"], "sample_ids": ["tOSWIURC-4", "zVacuqSb4LI"], "start_seconds": ["0", "30"], "properties": ["noise, engine, revs", "blares, fades, train"], "captions_pred_video": [null, "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a lawn mower is running ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train?", "label": 1}, {"captions": ["a clock ticktocks briefly", "an infant crying as a woman laughs"], "sample_ids": ["u7C-AEBQM", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["ticktocks, clock, ticktocks briefly", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a ticktock of a clock", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a woman and man speak while food is frying", "a small voice speaks, music plays followed by a double whoosh, and then a bell dings"], "sample_ids": ["zk-xJGQU8-4", "tQWGZLItBXk"], "start_seconds": ["130", "170"], "properties": ["food, man, woman", "voice, music, whoosh"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "worms revolution screenshots"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a child speaks music plays video game sounds sound effects and sound effects play "], "question": "which entity has a voice speaking?", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vh30P49Po6s", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["loud, continuous, quacks", "a woman, laughs, animal"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a duck is quacking loudly", "a woman is speaking and a baby is crying"], "question": "which entity is not continuous", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["tK4VlLsNxak", "tiDFTC-5vU"], "start_seconds": ["120", "30"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "male, duck, laugh"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", null], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a man is speaking and ducks are quacking"], "question": "which entity is a spoof", "label": 1}, {"captions": ["water runs into a sink while men speak", "water splashes as an animal walks through"], "sample_ids": ["vzceMbklWc", "w1ir-sZ3Im8"], "start_seconds": ["180", "90"], "properties": ["water, sink, run", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["water is running and a man is speaking", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "a vehicle engine accelerating then running on idle"], "sample_ids": ["wqN6IIHw3po", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["rain, surface, fall", "engine, accelerate, idle"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking and water is splashing", "an engine is idling"], "question": "which entity is a moving object", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a man speaks as a motor runs in the background"], "sample_ids": ["wvKpEYswXO0", "xZepNM9qcRA"], "start_seconds": ["150", "30"], "properties": ["sound, water, running", "background, motor, run"], "captions_pred_video": ["of the person preparing food in the kitchen", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "dishes cling together then a man begins to speak"], "sample_ids": ["x9JovgqUcs", "sQGXqGcwOTc"], "start_seconds": ["500", "3"], "properties": ["a, man, speaks, keyboard", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man speaks and types on a keyboard", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a woman and man speak while food is frying", "a duck quacks loudly and continuously"], "sample_ids": ["zk-xJGQU8-4", "vh30P49Po6s"], "start_seconds": ["130", "30"], "properties": ["food, man, woman", "loud, continuous, quacks"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "a duck is quacking loudly"], "question": "which entity is speaking", "label": 0}, {"captions": ["birds chirp and a dog breathes heavily", "water splashes as an animal walks through"], "sample_ids": ["y2ZBGpgbhHM", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["dog, chirp, breathe", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["birds chirping and a dog panting", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["yJ0TePmaOo", "w5W5Kqtc8E"], "start_seconds": ["390", "100"], "properties": ["two hard objects, man, speak", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running?", "label": 1}, {"captions": ["continuous sneezing together with speech", "winds blows roughly as a vehicle races past"], "sample_ids": ["x4dZyf9Gbj0", "xjvTpk2Zpr8"], "start_seconds": ["130", "70"], "properties": ["continuous, sneeze, speech", "wind, blows, vehicle"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a woman sneezes and speaks", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a man speaks while playing a video game on a keyboard", "water flows and trickles"], "sample_ids": ["tw76HGONaKg", "tB7hWb9gTuQ"], "start_seconds": ["570", "30"], "properties": ["A, game, keyboard", "water, flow, trickle"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["water rushes by", "a man speaks followed by another man speaking outside"], "sample_ids": ["x-PeY8Yb8M4", "viuTg1M-dqg"], "start_seconds": ["300", "30"], "properties": ["water, rushes, by", "two men, speak, follow"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a car is driving on a wet road ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of two men speaking?", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "multiple people speak and children yell while water gurgles"], "sample_ids": ["zl9Dqx-j7q4", "vb1fPSDI4c"], "start_seconds": ["6", "30"], "properties": ["engine, laugh, loud", "multiple, people, yell"], "captions_pred_video": ["footage of a man driving a car in the dark", null], "captions_pred_audio": ["a jet engine roars ", "a crowd of people are talking and laughing"], "question": "which entity is quieter", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "a train horn blows as it passes by"], "sample_ids": ["yswmmRZFItk", "zVacuqSb4LI"], "start_seconds": ["0", "30"], "properties": ["background, frog, croak", "horn, blows, train"], "captions_pred_video": ["a close up of a frog in the water", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a frog is croaking", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which is louder", "label": 0}, {"captions": ["a telephone rings and a bird vocalizes", "birds chirp and objects are moved around"], "sample_ids": ["skd2PphS6oI", "yPUYU6t3rwo"], "start_seconds": ["190", "370"], "properties": ["ring, bird, vocalize", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a telephone bell rings repeatedly ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["leaves rustle while man speaks", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["zOZleIRqZm4", "sLUnaPT5gM8"], "start_seconds": ["80", "0"], "properties": ["leaves, rustle, speak", "loud, laughter, intermittent"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "a propeller rotates loudly and intensely"], "sample_ids": ["sxYkFKFIZD0", "ugHJF0hfYkg"], "start_seconds": ["20", "10"], "properties": ["screech, man, door", "loud, intense, propeller"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["a few ducks quack and scamper and a man speaks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["w2bYrCVLT60", "uZesmtKZGSw"], "start_seconds": ["120", "250"], "properties": ["ducks, speak, quack", "men, talk, cars"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["ducks are quacking and a man is speaking", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a child speaks", "pigeons vocalize and birds chirp"], "sample_ids": ["yW6FWLSLkx4", "uiS58TNyUiw"], "start_seconds": ["40", "430"], "properties": ["a, child, speaks", "vocalize, bird, chirp"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "of the pigeon in the cage"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["someone snores nearby", "a man speaks as horns blow"], "sample_ids": ["spJCm8tD9Zo", "tHyNqRyK34A"], "start_seconds": ["90", "24"], "properties": ["someone snores, nearby, someone", "a, man, speaks"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "being taken from inside a vehicle on the street at night"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a car is honking with background noise "], "question": "which entity is a person", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "a soft wind underscores a woman laughing"], "sample_ids": ["sOa7g-44Dag", "s6DESzUTGjY"], "start_seconds": ["30", "16"], "properties": ["background, man, spray", "wind, laugh, woman"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a motorboat is moving with wind noise in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "water flows and trickles"], "sample_ids": ["vZAw4apG0Es", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["background, clock, ticktocks", "water, flow, trickle"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a clock is ticking and people are talking", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaking together with birds chirping and distant murmuring", "a toilet flushes and water drains"], "sample_ids": ["uiS58TNyUiw", "sfAvvZwdLCY"], "start_seconds": ["430", "20"], "properties": ["audio, man, speaking", "water drains, flushes, water"], "captions_pred_video": ["of the pigeon in the cage", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a railroad crossing bell rings as a train horn blows", "people speak in a closed space"], "sample_ids": ["tZGN5a7ybxo", "sTpirNYo8vQ"], "start_seconds": ["60", "30"], "properties": ["ring, train, horn", "people, space, speak"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "of a man taking a selfie on a bus"], "captions_pred_audio": ["a train is moving and blowing its horn ", "a man is speaking while a car is revving and accelerating "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["zY3icUyMdh8", "vlS6YMeWAPo"], "start_seconds": ["20", "40"], "properties": ["dog, bark, engine", "sheep, baa, birds"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a child babbles as a woman speaks", "a stream of water runs briefly"], "sample_ids": ["wEBlkGWVWwE", "x-PeY8Yb8M4"], "start_seconds": ["260", "300"], "properties": ["a, babble, woman", "stream, water, run"], "captions_pred_video": ["shows a person writing on the whiteboard", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "a drill runs and two people laugh"], "sample_ids": ["xV7Mg1QucSc", "tEE3MpBt1sg"], "start_seconds": ["14", "50"], "properties": ["alarm, ticktocks, laughs", "two people, laugh, drill"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "people are laughing breathing and speaking with background noise "], "question": "which entity is a drill?", "label": 1}, {"captions": ["a clock ticktocks in wind", "paper is crumpling consistently"], "sample_ids": ["yVumC9TGknc", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["ticktocks, clock, wind", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a series of beeps and chirps", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "a duck quacks continuously"], "sample_ids": ["tDlysoZiA1I", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["animal, grunt, multiple", "quacks, continuously, duck"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a duck is quacking loudly"], "question": "which animal is making a noise", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["voJh2gJxXhA", "zj2R0XoFr5k"], "start_seconds": ["50", "50"], "properties": ["music, frog, croak", "airplane, boy, fly"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["music is playing and crickets are chirping ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a man speaking with light rustling", "water is sprayed across a hard surface"], "sample_ids": ["zOZleIRqZm4", "sQwlkXjQabo"], "start_seconds": ["80", "10"], "properties": ["light, rustling, man", "water, spray, surface"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a car speeding up in the distance", "a woman speaks and a baby laughs"], "sample_ids": ["u0TrcHhkPQ", "tOj4tdLRaA"], "start_seconds": ["20", "70"], "properties": ["distance, car, speed", "woman, laugh, baby"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a baby laughs and a woman speaks"], "question": "which entity is not moving", "label": 1}, {"captions": ["water gurgles, metal squeaks and the water stops", "a stream of water flows as people talk and wind blows"], "sample_ids": ["x4a9YGIw4ok", "xBxDz0CFVn0"], "start_seconds": ["120", "30"], "properties": ["water, gurgles, stops", "stream, water, flow"], "captions_pred_video": ["footage is blurry and out of focus", "footage is blurry and out of focus"], "captions_pred_audio": ["a toilet flushes and water splashes", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["xjvTpk2Zpr8", "uYT5gxnyMWM"], "start_seconds": ["70", "50"], "properties": ["engine, run, wind", "female, spraying, scream"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["zY3icUyMdh8", "vfYTJq7nU"], "start_seconds": ["20", "130"], "properties": ["dog, bark, engine", "rustling, ducks, quack"], "captions_pred_video": ["footage of a bus driving through a residential street at night", null], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a duck quacks and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "someone whistles a tune"], "sample_ids": ["sNB8zxXneIM", "sIXTftIuUgw"], "start_seconds": ["20", "90"], "properties": ["several, quack, cocks", "someone, tune, whistle"], "captions_pred_video": ["a group of geese in a cage", null], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a person whistling a song"], "question": "which entity is a human", "label": 1}, {"captions": ["an animal quacks rapidly", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vh30P49Po6s", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["animal, quacks, rapidly", "a, scream, girl"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a duck is quacking loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "wind blowing followed by a zoom"], "sample_ids": ["w5W5Kqtc8E", "vr8ZXjEBhMQ"], "start_seconds": ["100", "150"], "properties": ["water, splashes, motorboat", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more calm", "label": 1}, {"captions": ["an engine starts and increases in power", "a propeller rotates loudly and intensely"], "sample_ids": ["zjTG0gaGCUI", "ugHJF0hfYkg"], "start_seconds": ["80", "10"], "properties": ["power, increase, engine", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["water splashes as an animal walks through", "a baby cries and fusses, a woman speaks, and a man speaks"], "sample_ids": ["w1ir-sZ3Im8", "wyllXV6PjKo"], "start_seconds": ["90", "30"], "properties": ["animal, water, splashes", "a baby, a woman, a man"], "captions_pred_video": ["footage of a group of people riding horses through a river", null], "captions_pred_audio": ["water splashes and gurgles as people speak", "a woman speaks and a baby cries"], "question": "which entity is a person", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "an engine runs loudly"], "sample_ids": ["y1saVTXsKwc", "vqZuVbG6-HI"], "start_seconds": ["80", "130"], "properties": ["a, dog, talk", "loud, engine, run"], "captions_pred_video": ["a dog playing with a pink ball", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a dog barks and a man speaks", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a person snores loudly multiple times at a close distance", "a whistling owl calls out repeatedly and insects screech"], "sample_ids": ["sSMl2vc3ek", "w6RTHR6AeAg"], "start_seconds": ["20", "40"], "properties": ["loud, multiple, distance", "call, owl, screech"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person snoring loudly", "an owl hoots and mechanisms operate "], "question": "which entity is a bird", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "some tunes played by whistling"], "sample_ids": ["wTideSjRFS0", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["food, sizzle, woman", "tune, play, whistling"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a person whistling a song"], "question": "which entity is playing tunes", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "water flows and trickles"], "sample_ids": ["yRx9txMcBl0", "tB7hWb9gTuQ"], "start_seconds": ["40", "30"], "properties": ["accelerates, tires, squeals", "water, flow, trickle"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a car is revving its engine and skidding ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["birds chirp as a bell rings", "several insects fly while two men talk"], "sample_ids": ["ziUT9IFTkjg", "s-T9OVOiMLo"], "start_seconds": ["10", "330"], "properties": ["chirp, bell, ring", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["zuua6-5goWw", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["birds, chirp, quiet, man, speaks", "a woman, a television program, a bird"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program playing?", "label": 1}, {"captions": ["food is frying while a woman speaks", "water flows as men speak and yell"], "sample_ids": ["yhQ2Lg-7qDY", "vJ7JPEFhyLA"], "start_seconds": ["130", "16"], "properties": ["food, woman, speak", "water, flow, men"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a woman speaking?", "label": 0}, {"captions": ["an airplane engine roars increasingly louder", "paper is crumpling consistently"], "sample_ids": ["vBslzh7saPw", "v5cSxLaHADY"], "start_seconds": ["90", "0"], "properties": ["engine, roar, louder", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a jet engine roars and accelerates ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "an airplane engine runs"], "sample_ids": ["vW4x7S1VfQc", "yVPZ2MNWpms"], "start_seconds": ["150", "0"], "properties": ["clacking, oil, woman", "engine, airplane, runs"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["food sizzles in a frying pan", "a car is driving by on the road "], "question": "which entity is a machine", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a propeller rotates loudly and intensely"], "sample_ids": ["yYEVLuqEytU", "ugHJF0hfYkg"], "start_seconds": ["40", "10"], "properties": ["animal, pig, background", "loud, intense, propeller"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "a person snores loudly multiple times at a close distance"], "sample_ids": ["yJ0TePmaOo", "sSMl2vc3ek"], "start_seconds": ["390", "20"], "properties": ["two hard objects, man, speak", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "people speak as gunfire rings out"], "sample_ids": ["zj2R0XoFr5k", "wqTCwqVRDlk"], "start_seconds": ["50", "80"], "properties": ["airplane, boy, fly", "gunfire, ring, speak"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war", "label": 1}, {"captions": ["a woman sneezes then speaks", "some men converse over an engine running"], "sample_ids": ["x4dZyf9Gbj0", "sCiy7QS1U"], "start_seconds": ["130", "300"], "properties": ["sneezes, speaks, woman", "men, converse, engine"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a woman sneezes and speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a woman?", "label": 0}, {"captions": ["a goat bleats and someone makes a calling noise", "a car accelerates and wind blows"], "sample_ids": ["vlS6YMeWAPo", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["noise, bleat, call", "accelerates, wind, blows"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", null], "captions_pred_audio": ["a goat bleats and birds chirp", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a dog barks and whimpers", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["sShpyu2l4YQ", "tDVADusiIoc"], "start_seconds": ["0", "60"], "properties": ["barks, whimpers, dog", "water, radio, man"], "captions_pred_video": ["the puppies are playing with a toy", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a dog is barking and growling", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "wind blowing followed by a zoom"], "sample_ids": ["zgUgkpk78xU", "vr8ZXjEBhMQ"], "start_seconds": ["70", "150"], "properties": ["horn, bells, ring", "wind, blow, zoom"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "wind blowing followed by a zoom"], "sample_ids": ["sTpirNYo8vQ", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["a, tone, fast", "wind, blow, zoom"], "captions_pred_video": ["of a man taking a selfie on a bus", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "winds blows roughly as a vehicle races past"], "sample_ids": ["s6DESzUTGjY", "xjvTpk2Zpr8"], "start_seconds": ["16", "70"], "properties": ["wind, laugh, woman", "wind, blows, vehicle"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a jet engine roars and wind blows "], "question": "which entity is more calm", "label": 0}, {"captions": ["an insect buzzes around continuously", "a cat meows as a young woman speaks"], "sample_ids": ["v25l1jef3JY", "x5cuQjOdM3E"], "start_seconds": ["0", "30"], "properties": ["buzzes, continuously, insect", "cat, meows, young woman"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "a black background with an airplane flying in the sky"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a cat meows and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["some clanking with distant murmuring", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["uMTTDZ2mb4", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["clanking, murmuring, distant", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["people speak then an engine runs", "dishes cling together then a man begins to speak"], "sample_ids": ["uMTTDZ2mb4", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["engine, run, people", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["a person screams glaringly", "paper is crumpling consistently"], "sample_ids": ["xC8kbrKJmco", "v5cSxLaHADY"], "start_seconds": ["0", "0"], "properties": ["glaringly, screams, person", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a goat is bleating ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["birds coo incessantly", "a woman speaks and then a man speaks"], "sample_ids": ["yZrFNS7GFBQ", "vbpKkWvfOu4"], "start_seconds": ["30", "560"], "properties": ["coo, bird, incessant", "a, man, speaks"], "captions_pred_video": ["of the bird in the cage", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["an owl hoots in the background ", "a woman is speaking and a man is speaking"], "question": "which entity is a human speaking?", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["tiDFTC-5vU", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["male, duck, laugh", "male, duck, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a man is speaking and ducks are quacking"], "question": "which entity is speaking and a duck quacks as others laugh", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a child speaks in closed space"], "sample_ids": ["x5cuQjOdM3E", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["cat, meows, young woman", "child, space, speak"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a cat meows and a woman speaks", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a train engine runs and a horn blows", "water pouring and bubbling"], "sample_ids": ["zPX9o1uDiI", "uyRfq-jKPpo"], "start_seconds": ["40", "50"], "properties": ["engine, horn, run", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "a car speeds away loudly followed by a car revving loudly and driving away while outside"], "sample_ids": ["sncRqQ67iJU", "sjlVMgdGSK0"], "start_seconds": ["460", "30"], "properties": ["loud, repeatedly, man", "car, revving, loudly"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "a 1965 ford falcon drag racing at 100mph on a 1/8 mile track"], "captions_pred_audio": ["a person is snoring", "a car accelerates and revs its engine "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "winds blows roughly as a vehicle races past"], "sample_ids": ["xl2PIWyXaM", "xjvTpk2Zpr8"], "start_seconds": ["160", "70"], "properties": ["chirp, man, younger person", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["birds are chirping and people are talking", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["water is sprayed across a hard surface", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["sQwlkXjQabo", "tdWhHV3X25Q"], "start_seconds": ["10", "60"], "properties": ["water, spray, surface", "applause, audience, yells"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["spraying followed by silence", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "water flows and trickles"], "sample_ids": ["y8WEcpOlT3I", "tB7hWb9gTuQ"], "start_seconds": ["40", "30"], "properties": ["wind, speak, buffeting", "water, flow, trickle"], "captions_pred_video": ["on how to use a sewing machine youtube", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "vehicles pass by on a roadway"], "sample_ids": ["u7C-AEBQM", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["ticks, rhythmic, quiet", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a ticktock of a clock", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "a vehicles accelerate quickly and someone laughs"], "sample_ids": ["xZepNM9qcRA", "uWPRNLnpy7Y"], "start_seconds": ["30", "10"], "properties": ["background, motor, run", "accelerate, laugh, vehicle"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "is taken from a car driving down the street"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "a man speaks as a motor runs in the background"], "sample_ids": ["w5W5Kqtc8E", "xZepNM9qcRA"], "start_seconds": ["100", "30"], "properties": ["water, splashes, motorboat", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "a man speaks as a car is passing by"], "sample_ids": ["zl9Dqx-j7q4", "sK4u5T8hW78"], "start_seconds": ["6", "30"], "properties": ["motors rev, laugh, loudly", "a, car, pass"], "captions_pred_video": ["footage of a man driving a car in the dark", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a jet engine roars ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tiDFTC-5vU", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["male, duck, laugh", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["voJh2gJxXhA", "uEU-Hg5MTN8"], "start_seconds": ["50", "27"], "properties": ["music, frog, croak", "a woman, laughs, animal"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["music is playing and crickets are chirping ", "a woman is speaking and a baby is crying"], "question": "which entity has a frog in it?", "label": 0}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["zALy31PjDl0", "tiDFTC-5vU"], "start_seconds": ["21", "30"], "properties": ["a man, a vehicle, a horn", "male, duck, laugh"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", null], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a man is speaking and ducks are quacking"], "question": "which entity has a duck?", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "water flows and trickles"], "sample_ids": ["vfYTJq7nU", "tB7hWb9gTuQ"], "start_seconds": ["130", "30"], "properties": ["rustling, ducks, quack", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a duck quacks and a woman speaks", "water is splashing and gurgling"], "question": "which entity is a video of water flowing?", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["tOSWIURC-4", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["engine, work, nearby", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a lawn mower is running ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a duck quacks continuously", "a train horn blows as it passes by"], "sample_ids": ["vh30P49Po6s", "zVacuqSb4LI"], "start_seconds": ["30", "30"], "properties": ["quacks, continuously, duck", "horn, blows, train"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a duck is quacking loudly", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "men speak and a nozzle sprays liquid"], "sample_ids": ["s59PfAghdkM", "wRV8yMk886E"], "start_seconds": ["0", "0"], "properties": ["bird, chirp, background, horse, neigh", "liquid, spray, nozzle"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "two cars are parked in a parking lot at night"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a man speaks followed by a loud burst"], "question": "which entity is more likely to be used in a science class", "label": 1}, {"captions": ["humming and rattling of an engine idling as it revs", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["xMXvkIcaG0Y", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["sound, humming, rattling", "airplane, boy, fly"], "captions_pred_video": ["footage of a car's hood being opened up to reveal the engine underneath the hood", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["an engine is revving and accelerating ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video", "label": 1}, {"captions": ["a small engine spits as it runs", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["sZvwOuuPGP0", "zl9Dqx-j7q4"], "start_seconds": ["50", "6"], "properties": ["spits, engine, runs", "engine, laugh, loud"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a medium engine is running ", "a jet engine roars "], "question": "which entity is followed by a man laughing", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "paper is crumpling consistently"], "sample_ids": ["uYT5gxnyMWM", "v5cSxLaHADY"], "start_seconds": ["50", "0"], "properties": ["female, spraying, scream", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "paper is crumpled and crinkled"], "question": "which entity is a video of a person speaking", "label": 0}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "people cheer as a vehicle engine revs"], "sample_ids": ["sU53zg9Jp7s", "xjhAnI2q6hM"], "start_seconds": ["380", "6"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "engine revs, vehicle, people"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "an insect buzzes around continuously"], "sample_ids": ["wqUmIEzuNz4", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["frog, bird, vocalize", "buzzes, continuously, insect"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a cat meows and rustles", "a fly is buzzing around a microphone "], "question": "which entity is not a frog?", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a clock ticktocks and sounds an alarm then a man laughs"], "sample_ids": ["vZAw4apG0Es", "xV7Mg1QucSc"], "start_seconds": ["30", "14"], "properties": ["background, tick, repeat", "alarm, ticktocks, laughs"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a clock is ticking and people are talking", "an alarm clock ticks and a woman laughs"], "question": "which entity has a tick that repeats", "label": 0}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a woman speaks happily and an animal chirps"], "sample_ids": ["wvKpEYswXO0", "uWAAAL4CIoc"], "start_seconds": ["150", "0"], "properties": ["sound, water, running", "a woman, chirps, animal"], "captions_pred_video": ["of the person preparing food in the kitchen", null], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a woman is speaking and a dog is barking "], "question": "which entity has a woman speaking softly?", "label": 0}, {"captions": ["speaking following by laughing and clapping", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["u2f5NpsoHBg", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["person, laugh, clap", "engine, laugh, loud"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "a car accelerates and wind blows"], "sample_ids": ["v7jJS8aAyA", "u0TrcHhkPQ"], "start_seconds": ["10", "20"], "properties": ["wind, blows, loudly", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a race car accelerates and revs its engine "], "question": "which vehicle is moving faster", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "a stream of water flows quickly"], "sample_ids": ["wyllXV6PjKo", "wbHTKEJZyhc"], "start_seconds": ["30", "20"], "properties": ["a kid, talk, cry", "stream, water, flow"], "captions_pred_video": [null, "footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and"], "captions_pred_audio": ["a woman speaks and a baby cries", "a waterfall is flowing and people are speaking "], "question": "which entity is moving", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["vhJWZheqaE", "sLUnaPT5gM8"], "start_seconds": ["0", "0"], "properties": ["water drains unevenly, toilet flushes, water drains", "loud, laughter, intermittent"], "captions_pred_video": [null, "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a toilet is flushed", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a toilet flushes and water sputters as it drains", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["smGI3C1NZc", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["water, drain, toilet", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking while food is frying in the background"], "question": "which entity is about a woman talking?", "label": 1}, {"captions": ["a beep occurs briefly", "bird squawks are accompanied by a man and woman speaking"], "sample_ids": ["xtWeJ56-U-g", "wqZ135Ssz0"], "start_seconds": ["20", "60"], "properties": ["beep, occur, briefly", "man, woman, squawks"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", null], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is accompanied by a man and woman speaking", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "some men converse over an engine running"], "sample_ids": ["wyllXV6PjKo", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["a kid, talk, cry", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman speaks and a baby cries", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a kid?", "label": 0}, {"captions": ["people converse in the distance as a clock ticks", "an engine works in idle nearby followed by a man talking"], "sample_ids": ["vZAw4apG0Es", "wqADXCzngMw"], "start_seconds": ["30", "340"], "properties": ["people, clock, converse", "engine, idle, man"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "of a man working on a vintage volkswagen beetle"], "captions_pred_audio": ["a clock is ticking and people are talking", "a lawn mower is running and a man is speaking "], "question": "which entity is about a clock?", "label": 0}, {"captions": ["race cars go around a track as a man commentates", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["uZesmtKZGSw", "yajyRTUQk3U"], "start_seconds": ["250", "400"], "properties": ["car, track, man", "a woman, something, fried"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["an adult woman and an adult man speak", "someone is typing on a computer keyboard"], "sample_ids": ["zTLVJCo4WEE", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["two people, adult, speak", "keyboard, type, computer"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "how to make money on youtube in spanish"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a person is typing on a keyboard"], "question": "which is not a person", "label": 1}, {"captions": ["wind noise makes sound into a microphone", "multiple adults speaking, and a child shouting in the background"], "sample_ids": ["w8uLijTqtlU", "yks4cLgIDMc"], "start_seconds": ["70", "170"], "properties": ["wind, microphone, noise", "background, speaking, child"], "captions_pred_video": ["footage is blurry and shaky", "footage of two kids wrestling on the floor"], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking and a child is crying"], "question": "which entity has a child shouting in the background?", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "an engine runs loudly"], "sample_ids": ["sa6TLVbooCc", "vqZuVbG6-HI"], "start_seconds": ["240", "130"], "properties": ["people, laugh, child", "loud, engine, run"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "multiple people speak and children yell while water gurgles"], "sample_ids": ["yajyRTUQk3U", "vb1fPSDI4c"], "start_seconds": ["400", "30"], "properties": ["noise, woman, speak", "multiple, people, yell"], "captions_pred_video": ["- a woman cooking in the kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["wind blowing followed by a zoom", "water is sprayed across a hard surface"], "sample_ids": ["vr8ZXjEBhMQ", "sQwlkXjQabo"], "start_seconds": ["150", "10"], "properties": ["wind, blow, zoom", "water, spray, surface"], "captions_pred_video": ["is taken from a motorcycle's point of view", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "a duck quacks loudly and continuously"], "sample_ids": ["wqUmIEzuNz4", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["frog, bird, vocalize", "loud, continuous, quacks"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a cat meows and rustles", "a duck is quacking loudly"], "question": "which animal is making a noise", "label": 1}, {"captions": ["a man speaks uses a drill", "roadway noise occurs and a truck accelerates"], "sample_ids": ["x5eIC7S0fbg", "tgbONvsP47Y"], "start_seconds": ["60", "0"], "properties": ["A man is speaking, uses a drill, and is a tool", "noise, truck, accelerate"], "captions_pred_video": ["a person in surgical gloves is using a needle to remove a small object from a tooth", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and using a power tool ", "a car is driving on the road "], "question": "which is a tool", "label": 0}, {"captions": ["a machine runs continuously", "a stream of water runs briefly"], "sample_ids": ["wdXV3Pv0jiY", "x-PeY8Yb8M4"], "start_seconds": ["11", "300"], "properties": ["machine, running, continuously", "stream, water, run"], "captions_pred_video": ["footage is blurry and shaky", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a car is driving on a wet road "], "question": "which entity is running continuously", "label": 0}, {"captions": ["people clap and speak in the distance", "winds blows roughly as a vehicle races past"], "sample_ids": ["wwyfGO2J4", "xjvTpk2Zpr8"], "start_seconds": ["90", "70"], "properties": ["clap, distance, speak", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["a person is snoring while sleeping", "an engine runs loudly"], "sample_ids": ["vJrjSeP17yE", "vqZuVbG6-HI"], "start_seconds": ["40", "130"], "properties": ["a person is sleeping, snoring, person", "loud, engine, run"], "captions_pred_video": ["a black background with a small plane flying in the sky", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a person snoring loudly", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "birds chirp and objects are moved around"], "sample_ids": ["vVhthZ45k3Y", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["cat, purr, hiss", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage is blurry and out of focus", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a person is snoring while sleeping", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["vJrjSeP17yE", "vlS6YMeWAPo"], "start_seconds": ["40", "40"], "properties": ["a person is sleeping, snoring, person", "sheep, baa, birds"], "captions_pred_video": ["a black background with a small plane flying in the sky", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a person snoring loudly", "a goat bleats and birds chirp"], "question": "which entity is a person", "label": 0}, {"captions": ["an animal growls followed by birds chirping", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["y2ZBGpgbhHM", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["animal, growl, bird", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["birds chirping and a dog panting", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["t25U-v4k4ts", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["a, chirps, bird", "animal, grunts, snorts"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a woman is speaking and a baby is crying"], "question": "which entity has a bird chirp?", "label": 0}, {"captions": ["a person speaks briefly", "wind blowing followed by a zoom"], "sample_ids": ["zOZleIRqZm4", "vr8ZXjEBhMQ"], "start_seconds": ["80", "150"], "properties": ["person, talk, brief", "wind, blow, zoom"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "wind blows and a chainsaw cuts through wood "], "question": "which entity is not talking", "label": 1}, {"captions": ["some men converse over an engine running", "a man speaks as a car is passing by"], "sample_ids": ["sCiy7QS1U", "sK4u5T8hW78"], "start_seconds": ["300", "30"], "properties": ["men, converse, engine", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a few ducks quack and scamper and a man speaks", "some tunes played by whistling"], "sample_ids": ["w2bYrCVLT60", "u6BnG6YZqJ4"], "start_seconds": ["120", "0"], "properties": ["ducks, speak, quack", "tune, play, whistling"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["ducks are quacking and a man is speaking", "a person whistling a song"], "question": "which entity is playing a tune", "label": 1}, {"captions": ["a small engine spits as it runs", "a propeller rotates loudly and intensely"], "sample_ids": ["sZvwOuuPGP0", "ugHJF0hfYkg"], "start_seconds": ["50", "10"], "properties": ["spits, engine, runs", "loud, intense, propeller"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a medium engine is running ", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["a dog barks and whimpers", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sShpyu2l4YQ", "xKB8O8LTs6s"], "start_seconds": ["0", "70"], "properties": ["barks, whimpers, dog", "music, gunfire, explosion"], "captions_pred_video": ["the puppies are playing with a toy", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a dog is barking and growling", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more calm", "label": 0}, {"captions": ["wind blows strongly and a young man speaks", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vs65y4qmyBE", "su6FAOcOA8c"], "start_seconds": ["340", "4"], "properties": ["wind, blows, strongly", "engine, idle, woman"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "birds chirp in the background while a horse neighs followed by a girl speaking"], "sample_ids": ["zcDwZ6W7E3E", "s59PfAghdkM"], "start_seconds": ["180", "0"], "properties": ["a, man, speak", "bird, chirp, background, horse, neigh"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "is an anime scene featuring two people and a horse in the foreground and a fence in the background"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "birds are chirping a horse is neighing and a woman is speaking "], "question": "which entity has a horse neighing?", "label": 1}, {"captions": ["a baby laugh at a sputter", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["sLUnaPT5gM8", "zFjIWfSD-4"], "start_seconds": ["0", "410"], "properties": ["laugh, sputter, baby", "People, motor, brakes"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", null], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a person", "label": 1}, {"captions": ["someone is snoring while sleeping", "water flows and trickles"], "sample_ids": ["ujMt0-D-x2k", "tB7hWb9gTuQ"], "start_seconds": ["0", "30"], "properties": ["snore, sleep, someone", "water, flow, trickle"], "captions_pred_video": ["of the dog playing with a toy on the floor", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a person is snoring loudly", "water is splashing and gurgling"], "question": "which entity is not a person", "label": 1}, {"captions": ["an animal quacks rapidly", "people cheer as a vehicle engine revs"], "sample_ids": ["vh30P49Po6s", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["animal, quacks, rapidly", "engine revs, vehicle, people"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a duck is quacking loudly", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["yJ0TePmaOo", "y8WEcpOlT3I"], "start_seconds": ["390", "40"], "properties": ["two hard objects, man, speak", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a man is speaking with wind noise in the background "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["birds chirps while a siren signals in the distance", "a stream runs then someone speaks"], "sample_ids": ["uKCSGgof8gI", "wbHTKEJZyhc"], "start_seconds": ["12", "20"], "properties": ["chirps, distance, signal", "stream, run, someone"], "captions_pred_video": ["footage of a street in a small town on a sunny day", "footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and"], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a waterfall is flowing and people are speaking "], "question": "which entity is a stream?", "label": 1}, {"captions": ["someone snores nearby", "people applaud and hoot and chat quietly"], "sample_ids": ["spJCm8tD9Zo", "wwyfGO2J4"], "start_seconds": ["90", "90"], "properties": ["someone snores, nearby, someone", "people, applaud, hoot"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man talks as several small engines run", "an infant crying frantically"], "sample_ids": ["u9A6VZQCZpU", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["a, man, talk", "cry, infant, frantically"], "captions_pred_video": [null, "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a man speaks as a car is passing by"], "sample_ids": ["vzxHnu-SFEw", "sK4u5T8hW78"], "start_seconds": ["80", "30"], "properties": ["two objects, woman, speak", "a, car, pass"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which object is moving", "label": 0}, {"captions": ["people speak softly as food sizzles", "paper folding and crinkling"], "sample_ids": ["yhQ2Lg-7qDY", "zPpG3RD8lSs"], "start_seconds": ["130", "20"], "properties": ["food, sizzle, speak", "paper, fold, crinkle"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a faucet is running and a man is speaking", "the wind blows and a mouse clicks "], "question": "which entity is not a person", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "people speak as gunfire rings out"], "sample_ids": ["y8WEcpOlT3I", "wqTCwqVRDlk"], "start_seconds": ["40", "80"], "properties": ["harsh, wind, blows", "gunfire, ring, speak"], "captions_pred_video": ["on how to use a sewing machine youtube", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["bees buzz and wind blows", "dishes cling together then a man begins to speak"], "sample_ids": ["tMJne1a4AFI", "sQGXqGcwOTc"], "start_seconds": ["0", "3"], "properties": ["bees buzz, wind blows, bees", "cling, speak, dishes"], "captions_pred_video": ["a swarm of bees on the ground", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a swarm of bees buzzing around", "mechanisms are operating and water is splashing "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a frog croaks as other frogs croak in the background"], "sample_ids": ["uEU-Hg5MTN8", "yswmmRZFItk"], "start_seconds": ["27", "0"], "properties": ["a woman, laughs, animal", "background, frog, croak"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "a close up of a frog in the water"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a frog is croaking"], "question": "which entity is a croaker", "label": 1}, {"captions": ["a toilet door squeaks as it is opened", "a man speaks as a car is passing by"], "sample_ids": ["sdXV-ylviw", "sK4u5T8hW78"], "start_seconds": ["190", "30"], "properties": ["door, toilet, squeaks", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a dog barks and taps with background noise ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["people speak as gunfire rings out", "a woman speaks happily and an animal chirps"], "sample_ids": ["wqTCwqVRDlk", "uWAAAL4CIoc"], "start_seconds": ["80", "0"], "properties": ["gunfire, ring, speak", "a woman, chirps, animal"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", null], "captions_pred_audio": ["a man is speaking and a gun is fired", "a woman is speaking and a dog is barking "], "question": "which entity is more calm", "label": 1}, {"captions": ["a helicopter engine idles continuously", "females talk and laugh over gusting wind"], "sample_ids": ["ugHJF0hfYkg", "un9VQlzgZM"], "start_seconds": ["10", "5"], "properties": ["engine, idle, continuously", "females, talk, laugh"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and laughing with wind noise and breathing in the background "], "question": "which entity is not a person?", "label": 0}, {"captions": ["a man speaks and is typing on a keyboard", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["x9JovgqUcs", "wDVMhEdTiVw"], "start_seconds": ["500", "30"], "properties": ["a, man, speaks, keyboard", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used for hunting", "label": 1}, {"captions": ["a weapon fires multiple times", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["sMC07Ucy7kg", "vfYTJq7nU"], "start_seconds": ["10", "130"], "properties": ["weapon, fire, multiple", "rustling, ducks, quack"], "captions_pred_video": ["footage is from a car's point of view", null], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a duck quacks and a woman speaks"], "question": "which entity is more likely to be used in a hunting situation", "label": 1}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "water pouring and bubbling"], "sample_ids": ["ukg5L09Wpvo", "uyRfq-jKPpo"], "start_seconds": ["150", "50"], "properties": ["clickety-clack, train, whistle", "water, bubbles, pouring"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "water splashes and a motorboat passes as people yell"], "sample_ids": ["vveS8HT7Uog", "w5W5Kqtc8E"], "start_seconds": ["100", "100"], "properties": ["a man, objects, speak", "water, splashes, motorboat"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", null], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is more active", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vBHyYJ8pL0", "vbZ-0lGPneg"], "start_seconds": ["2", "30"], "properties": ["noise, door, opening", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a video of a door opening and closing?", "label": 0}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["wnpJndXuxLc", "uEU-Hg5MTN8"], "start_seconds": ["50", "27"], "properties": ["beeps, loud, whistle", "a woman, laughs, animal"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks followed by another man speaking outside", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["viuTg1M-dqg", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["two men, speak, follow", "People, motor, brakes"], "captions_pred_video": ["footage of water coming out of a hole in the ground", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has more people", "label": 1}, {"captions": ["multiple birds vocalize and wind blows", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["uoGVs9yUqY4", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["multiple, vocalize, wind", "rooster, crow, background, men"], "captions_pred_video": ["for how to make a wooden shed door youtube", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a rooster?", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "plastic is tapped on while someone speaks"], "sample_ids": ["sTpirNYo8vQ", "wvKpEYswXO0"], "start_seconds": ["30", "150"], "properties": ["a, tone, fast", "plastic, tap, speak"], "captions_pred_video": ["of a man taking a selfie on a bus", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["a woman speaks as frying food sizzles", "a toilet flushes and a female speaks"], "sample_ids": ["wTideSjRFS0", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["food, sizzle, woman", "female, flushes, toilet"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a toilet flushes and a man speaks"], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "a man speaks as a motor runs in the background"], "sample_ids": ["uOpoD0gGXcs", "xZepNM9qcRA"], "start_seconds": ["120", "30"], "properties": ["chirps, woman, bird", "background, motor, run"], "captions_pred_video": ["a herd of cows grazing in the field", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["birds are chirping and a man is speaking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a man speaking to a motor?", "label": 1}, {"captions": ["a dog barks and whimpers", "dogs bark as an engine runs and a person whistles"], "sample_ids": ["sShpyu2l4YQ", "zY3icUyMdh8"], "start_seconds": ["0", "20"], "properties": ["barks, whimpers, dog", "dog, bark, engine"], "captions_pred_video": ["the puppies are playing with a toy", "footage of a bus driving through a residential street at night"], "captions_pred_audio": ["a dog is barking and growling", "a car is driving and dogs are barking and squealing "], "question": "which dog barks and whimpers", "label": 0}, {"captions": ["a woman talking as an infant is crying", "a man speaks then multiple motorcycles pass by"], "sample_ids": ["tMbMDvT50j8", "zcDwZ6W7E3E"], "start_seconds": ["12", "180"], "properties": ["a, talk, infant", "a, man, speak"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "2 people riding motorcycles down a mountain road with trees lining the sides of the road"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking while a car accelerates and revs its engine "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "a weapon fires multiple times"], "sample_ids": ["zcDwZ6W7E3E", "sMC07Ucy7kg"], "start_seconds": ["180", "10"], "properties": ["man, speak, motorcycles", "weapon, fire, multiple"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage is from a car's point of view"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["water runs into a sink while men speak", "running water in a faucet with some clinks"], "sample_ids": ["vzceMbklWc", "zNRChLjqcU"], "start_seconds": ["180", "220"], "properties": ["water, sink, run", "water, faucet, run"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and a man is speaking", "water is running from a faucet into a sink"], "question": "which entity has water running into it", "label": 0}, {"captions": ["a man speaks as a car is passing by", "a car accelerates and wind blows"], "sample_ids": ["sK4u5T8hW78", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["a, car, pass", "accelerates, wind, blows"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a race car accelerates and revs its engine "], "question": "which car is moving faster", "label": 1}, {"captions": ["an airplane engine spools and people speak", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["wTjoRj1se3U", "sLUnaPT5gM8"], "start_seconds": ["390", "0"], "properties": ["airplane, engine, spool", "loud, laughter, intermittent"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a jet engine is running and people are talking", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["people speak and tapping occurs", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tFCUUGdREgA", "zj2R0XoFr5k"], "start_seconds": ["70", "50"], "properties": ["people, tap, speak", "airplane, boy, fly"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a plane flying by?", "label": 1}, {"captions": ["people speak as gunfire rings out", "a toilet flushes and a female speaks"], "sample_ids": ["wqTCwqVRDlk", "yaln9y8I7ms"], "start_seconds": ["80", "230"], "properties": ["gunfire, ring, speak", "female, flushes, toilet"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a gun is fired", "a toilet flushes and a man speaks"], "question": "which entity is more likely to be in a bathroom?", "label": 1}, {"captions": ["water rushes by", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["x-PeY8Yb8M4", "vfYTJq7nU"], "start_seconds": ["300", "130"], "properties": ["water, rushes, by", "rustling, ducks, quack"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", null], "captions_pred_audio": ["a car is driving on a wet road ", "a duck quacks and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "people cheer as a vehicle engine revs"], "sample_ids": ["sQGXqGcwOTc", "xjhAnI2q6hM"], "start_seconds": ["3", "6"], "properties": ["audio, kid, giggles", "engine revs, vehicle, people"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["ukg5L09Wpvo", "zj2R0XoFr5k"], "start_seconds": ["150", "50"], "properties": ["a train, a horn, a bell", "airplane, boy, fly"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vVhthZ45k3Y", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["cat, purr, hiss", "gun, shoot, water"], "captions_pred_video": ["footage is blurry and out of focus", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to cause injury", "label": 1}, {"captions": ["water splashes as an animal walks through", "three men talk while wind blows and some liquid flows"], "sample_ids": ["w1ir-sZ3Im8", "vJ7JPEFhyLA"], "start_seconds": ["90", "16"], "properties": ["animal, water, splashes", "three men, wind, flow"], "captions_pred_video": ["footage of a group of people riding horses through a river", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["water splashes and gurgles as people speak", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["birds chirp and objects are moved around", "a woman speaks and then a man speaks"], "sample_ids": ["yPUYU6t3rwo", "vbpKkWvfOu4"], "start_seconds": ["370", "560"], "properties": ["birds chirp, objects are moved around, birds", "a, man, speaks"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["insects buzz and a man speaks", "a woman is speaking and a man is speaking"], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "water splashes as an animal walks through"], "sample_ids": ["vXlk0lIQBFo", "w1ir-sZ3Im8"], "start_seconds": ["470", "90"], "properties": ["wind, speak, vocalize", "animal, water, splashes"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vh30P49Po6s", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["loud, continuous, quacks", "gun, shoot, water"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a duck is quacking loudly", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is louder", "label": 0}, {"captions": ["running water in a faucet with some clinks", "running water in a faucet with some clinks"], "sample_ids": ["zNRChLjqcU", "zNRChLjqcU"], "start_seconds": ["220", "220"], "properties": ["water, faucet, run", "water, faucet, run"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running from a faucet into a sink", "water is running from a faucet into a sink"], "question": "which entity is a faucet?", "label": 0}, {"captions": ["an airplane engine spools and people speak", "water splashes as an animal walks through"], "sample_ids": ["wTjoRj1se3U", "w1ir-sZ3Im8"], "start_seconds": ["390", "90"], "properties": ["airplane, engine, spool", "animal, water, splashes"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a jet engine is running and people are talking", "water splashes and gurgles as people speak"], "question": "which entity is a moving object", "label": 1}, {"captions": ["an animal quacks rapidly", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["vh30P49Po6s", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["animal, quacks, rapidly", "loud, laughter, intermittent"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a duck is quacking loudly", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "loud ringing of a telephone stops followed by a man speaking and a digital beep"], "sample_ids": ["wz7N8YRy74I", "uzQnlJXBbOM"], "start_seconds": ["30", "50"], "properties": ["rooster, crow, background, people", "ringing, beep, stop"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "footage of a person using a cell phone on a table"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a telephone rings and a man speaks"], "question": "which entity is a recording of a telephone call?", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["uRExseg-0XI", "wwyfGO2J4"], "start_seconds": ["210", "90"], "properties": ["woman, man, water", "people, applaud, hoot"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", null], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "people are clapping and speaking with background noise "], "question": "which entity has more people", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "a car accelerates and wind blows"], "sample_ids": ["sHbXC6na9hg", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["a person, saw, wood", "accelerates, wind, blows"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", null], "captions_pred_audio": ["an engine is idling and vibrating", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "a woman speaks as she rubs two objects together"], "sample_ids": ["uOpoD0gGXcs", "vzxHnu-SFEw"], "start_seconds": ["120", "80"], "properties": ["chirps, woman, bird", "two objects, woman, speak"], "captions_pred_video": ["a herd of cows grazing in the field", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["birds are chirping and a man is speaking", "a woman is speaking and breathing with mechanisms in the background "], "question": "which woman is speaking?", "label": 1}, {"captions": ["a man speaks uses a drill", "birds chirp and objects are moved around"], "sample_ids": ["x5eIC7S0fbg", "yPUYU6t3rwo"], "start_seconds": ["60", "370"], "properties": ["A man is speaking, uses a drill, and is a tool", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a person in surgical gloves is using a needle to remove a small object from a tooth", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and using a power tool ", "insects buzz and a man speaks"], "question": "which entity is a tool", "label": 0}, {"captions": ["a man speaks uses a drill", "an airplane engine runs"], "sample_ids": ["x5eIC7S0fbg", "yVPZ2MNWpms"], "start_seconds": ["60", "0"], "properties": ["A man is speaking, uses a drill, and is a tool", "engine, airplane, runs"], "captions_pred_video": ["a person in surgical gloves is using a needle to remove a small object from a tooth", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking and using a power tool ", "a car is driving by on the road "], "question": "which entity is a tool", "label": 0}, {"captions": ["a horn honks and then loudly blares", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["wnpJndXuxLc", "ukg5L09Wpvo"], "start_seconds": ["50", "150"], "properties": ["horn, honk, loud", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a train blows its whistle and blows its horn "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "a vehicle engine accelerating then running on idle"], "sample_ids": ["wtDqrBygTcU", "vYkA3cfXp5Q"], "start_seconds": ["30", "30"], "properties": ["man, engine, run", "engine, accelerate, idle"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking and a motor is running", "an engine is idling"], "question": "which engine is running", "label": 1}, {"captions": ["a woman speaks and other women and a man talk with her", "loud clanking and banging with brief male speech"], "sample_ids": ["vbpKkWvfOu4", "sWZzXuWYY"], "start_seconds": ["560", "420"], "properties": ["a, woman, man", "male, speech, banging"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a sewing machine runs and a man speaks"], "question": "which entity has a man speaking?", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "a man speaks followed by another man speaking outside"], "sample_ids": ["sAam2NqGhLY", "viuTg1M-dqg"], "start_seconds": ["20", "30"], "properties": ["snoring, breathing, child", "two men, speak, follow"], "captions_pred_video": ["of a little girl sleeping on a couch", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a person is snoring", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "people cheer as a vehicle engine revs"], "sample_ids": ["soTOh3zYJfY", "xjhAnI2q6hM"], "start_seconds": ["40", "6"], "properties": ["vehicle, skid, tires", "engine revs, vehicle, people"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a truck is revving its engine and a man is speaking "], "question": "which vehicle is skidding", "label": 0}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "people applaud and hoot and chat quietly"], "sample_ids": ["zALy31PjDl0", "wwyfGO2J4"], "start_seconds": ["21", "90"], "properties": ["a man, a vehicle, a horn", "people, applaud, hoot"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", null], "captions_pred_audio": ["a man is speaking and a car horn is honking", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "people cheer as a vehicle engine revs"], "sample_ids": ["y8WEcpOlT3I", "xjhAnI2q6hM"], "start_seconds": ["40", "6"], "properties": ["wind, speak, buffeting", "engine revs, vehicle, people"], "captions_pred_video": ["on how to use a sewing machine youtube", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["sjlVMgdGSK0", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["accelerates, vehicle, race car", "male, duck, laugh"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a stream of water runs briefly"], "sample_ids": ["weDbePuc-Xc", "x-PeY8Yb8M4"], "start_seconds": ["40", "300"], "properties": ["music, slaps, human", "stream, water, run"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a man speaks as a machine runs", "paper folding and crinkling"], "sample_ids": ["vD6lYD1l0BY", "zPpG3RD8lSs"], "start_seconds": ["330", "20"], "properties": ["a, machine, run", "paper, fold, crinkle"], "captions_pred_video": ["game controller being held in the hands of the person", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "the wind blows and a mouse clicks "], "question": "which entity is a demonstration of a machine?", "label": 0}, {"captions": ["a motor idles, accelerates, then slows down.", "the revving of an engine throttle followed by a man speaking"], "sample_ids": ["vYkA3cfXp5Q", "tezvROoo4bs"], "start_seconds": ["30", "40"], "properties": ["speed, idle, accelerate", "audio, throttle, speaking"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "footage of a busy city street with cars parked on both sides of the road"], "captions_pred_audio": ["an engine is idling", "a car accelerates and revs while a man speaks "], "question": "which entity is a video of a motor?", "label": 0}, {"captions": ["dishes cling together then a man begins to speak", "a sleeping person emits a gravely snore"], "sample_ids": ["sQGXqGcwOTc", "w2JXXIAdUdg"], "start_seconds": ["3", "10"], "properties": ["cling, speak, dishes", "emits, sleeping, person"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "a close up shot of a person's mouth with a toothbrush in it"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a person snoring and a dog whimpering"], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "a woman talking as an infant is crying"], "sample_ids": ["x6ijhqRY38s", "tMbMDvT50j8"], "start_seconds": ["250", "12"], "properties": ["bowl, silverware, man", "a, talk, infant"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "shows a little girl covering her face with her hands while sitting at a table"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a baby cries and a woman speaks"], "question": "which entity is about a person talking?", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "a train horn blows as it passes by"], "sample_ids": ["sncRqQ67iJU", "zVacuqSb4LI"], "start_seconds": ["460", "30"], "properties": ["loud, repeatedly, man", "horn, blows, train"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a person is snoring", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which is louder", "label": 0}, {"captions": ["a man talks while vehicles pass by", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sK4u5T8hW78", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["a, man, talk", "loud, laughter, intermittent"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["someone snores nearby", "a man speaks as a car is passing by"], "sample_ids": ["spJCm8tD9Zo", "sK4u5T8hW78"], "start_seconds": ["90", "30"], "properties": ["someone snores, nearby, someone", "a, car, pass"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "a woman speaks over sizzling noise"], "sample_ids": ["uWPRNLnpy7Y", "yajyRTUQk3U"], "start_seconds": ["10", "400"], "properties": ["accelerate, laugh, vehicle", "noise, woman, speak"], "captions_pred_video": ["is taken from a car driving down the street", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person speaking over noise?", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "winds blows roughly as a vehicle races past"], "sample_ids": ["w5W5Kqtc8E", "xjvTpk2Zpr8"], "start_seconds": ["100", "70"], "properties": ["wind, engine, scream", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a jet engine roars and wind blows "], "question": "which entity is about a vehicle racing past?", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "water flows and trickles"], "sample_ids": ["sDSppXIlJrs", "tB7hWb9gTuQ"], "start_seconds": ["27", "30"], "properties": ["microphone, water, wind", "water, flow, trickle"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["the wind is blowing and water is splashing", "water is splashing and gurgling"], "question": "which entity is a video of water flowing?", "label": 1}, {"captions": ["a person sniffles and sneezes", "water quietly rushes by while birds chirp in the background"], "sample_ids": ["uRlbY6aoBU", "sYITalLZjj4"], "start_seconds": ["0", "30"], "properties": ["sneezes, sniffles, person", "water, rushes, background, birds"], "captions_pred_video": [null, "two ducks are swimming in the water near each other"], "captions_pred_audio": ["a man is sneezing ", "wind blows and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a baby cries and a woman moans", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["smDKStoHBJo", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["a, cry, woman", "female, spraying, scream"], "captions_pred_video": ["a man holding a crying baby in his arms", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a woman is speaking and a baby is crying"], "question": "which entity has a woman spraying?", "label": 1}, {"captions": ["a baby laugh at a sputter", "winds blows roughly as a vehicle races past"], "sample_ids": ["sLUnaPT5gM8", "xjvTpk2Zpr8"], "start_seconds": ["0", "70"], "properties": ["laugh, sputter, baby", "wind, blows, vehicle"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "someone whistles a tune"], "sample_ids": ["wwyfGO2J4", "sIXTftIuUgw"], "start_seconds": ["90", "90"], "properties": ["people, applaud, hoot", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "a machine beeps continuously"], "sample_ids": ["zuua6-5goWw", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["birds, chirp, quiet, man, speaks", "beeps, machine, continuously"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", null], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a beeping sound is being made "], "question": "which entity is quieter", "label": 0}, {"captions": ["a infant makes noise and is excited", "a dog barks and whimpers"], "sample_ids": ["wIJK3-5y0kA", "sShpyu2l4YQ"], "start_seconds": ["30", "0"], "properties": ["noise, excited, infant", "barks, whimpers, dog"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "the puppies are playing with a toy"], "captions_pred_audio": ["a baby cries and a woman speaks", "a dog is barking and growling"], "question": "which entity is more quiet", "label": 1}, {"captions": ["bees buzz and wind blows", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["tMJne1a4AFI", "zl9Dqx-j7q4"], "start_seconds": ["0", "6"], "properties": ["bees buzz, wind blows, bees", "engine, laugh, loud"], "captions_pred_video": ["a swarm of bees on the ground", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a swarm of bees buzzing around", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "people cheer as a vehicle engine revs"], "sample_ids": ["xZepNM9qcRA", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["background, motor, run", "engine revs, vehicle, people"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a truck is revving its engine and a man is speaking "], "question": "which entity has a motor running in the background?", "label": 0}, {"captions": ["waves crash against a shoreline and wind blows", "a man speaks followed by another man speaking outside"], "sample_ids": ["zdYdyF9-m8U", "viuTg1M-dqg"], "start_seconds": ["7", "30"], "properties": ["wind, crash, shoreline", "two men, speak, follow"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["waves crash and wind blows ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of two men speaking?", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["ul60S8TXDA8", "uYT5gxnyMWM"], "start_seconds": ["60", "50"], "properties": ["sound, distance, bell", "a, scream, girl"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["someone sprays a liquid onto a hard surface making a hiss sound", "a piece of wood is being placed down and sawed"], "sample_ids": ["zO-LSSY92ZM", "uiItxDsDMFI"], "start_seconds": ["30", "30"], "properties": ["liquid, surface, sound", "wood, piece, saw"], "captions_pred_video": ["youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'", "a man cutting a log with an axe in the woods"], "captions_pred_audio": ["steam is hissing and hissing", "a saw is being used with background noise "], "question": "which entity is being cut", "label": 1}, {"captions": ["a clock ticktocks", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["v-g-j2uTByM", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["ticktocks, clock, ticktocks", "loud, jet engine, roar"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a clock is ticking loudly", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["children speak and play together", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["yVVP8XvWJTo", "uYT5gxnyMWM"], "start_seconds": ["260", "50"], "properties": ["children, speak, play", "a, scream, girl"], "captions_pred_video": ["footage of a playground at a school or daycare center", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity is about a girl speaking followed by a scream?", "label": 1}, {"captions": ["a vehicle accelerates squealing tires", "a duck quacks loudly and continuously"], "sample_ids": ["sd7xVssqlw", "vh30P49Po6s"], "start_seconds": ["50", "30"], "properties": ["accelerates, tires, squealing", "loud, continuous, quacks"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["bees buzz as wind blows", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["tMJne1a4AFI", "tiDFTC-5vU"], "start_seconds": ["0", "30"], "properties": ["bees, buzz, wind", "male, duck, laugh"], "captions_pred_video": ["a swarm of bees on the ground", null], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking and ducks are quacking"], "question": "which entity is speaking", "label": 1}, {"captions": ["a small engine spits as it runs", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["sZvwOuuPGP0", "ukg5L09Wpvo"], "start_seconds": ["50", "150"], "properties": ["spits, engine, runs", "clickety-clack, train, whistle"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a medium engine is running ", "a train blows its whistle and blows its horn "], "question": "which train whistle keeps going off", "label": 1}, {"captions": ["wind blowing and birds chirping with the distant cooing of a large bird", "a man speaks as a car is passing by"], "sample_ids": ["wRBHTgrbiwg", "sK4u5T8hW78"], "start_seconds": ["50", "30"], "properties": ["birds, chirp, cooing", "a, car, pass"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["an adult woman speaks over chopping and silverware noises", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["yYJksgsxx5U", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["audio, woman, silverware", "wind, blow, vehicle"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", null], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["birds vocalize and chirp continuously", "a man speaks followed by another man speaking outside"], "sample_ids": ["w1mlz3Pe4fU", "viuTg1M-dqg"], "start_seconds": ["300", "30"], "properties": ["vocalize, chirp, continuously", "two men, speak, follow"], "captions_pred_video": ["of a bird in a cage", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["birds are chirping and singing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a human", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "an airplane engine spools and people speak"], "sample_ids": ["tDVADusiIoc", "wTjoRj1se3U"], "start_seconds": ["60", "390"], "properties": ["water, radio, man", "airplane, engine, spool"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a jet engine is running and people are talking"], "question": "which entity is a video of an airplane engine spooling?", "label": 1}, {"captions": ["a person snoring", "speaking following by laughing and clapping"], "sample_ids": ["t8tv5YRMJUg", "u2f5NpsoHBg"], "start_seconds": ["0", "30"], "properties": ["a person, snore, loud", "person, laugh, clap"], "captions_pred_video": ["of a man getting his face licked by another man", "is being projected on a screen at the front of the stage"], "captions_pred_audio": ["a person sniffs and breathes heavily", "a woman is speaking and a crowd is clapping"], "question": "which person is more likely to be clapping", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "some men converse over an engine running"], "sample_ids": ["yeFvk9x0wWI", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["clack, bird, chirp", "men, converse, engine"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", null], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a conversation?", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["x5cuQjOdM3E", "tDlysoZiA1I"], "start_seconds": ["30", "0"], "properties": ["cat, meows, young woman", "animal, grunts, chirps"], "captions_pred_video": ["a black background with an airplane flying in the sky", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a cat meows and a woman speaks", "birds are chirping and a rooster is crowing "], "question": "which entity is a domesticated animal", "label": 0}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "a duck quacks continuously"], "sample_ids": ["wSVhSdj0F0", "vh30P49Po6s"], "start_seconds": ["10", "30"], "properties": ["beep, clang, footsteps", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "a infant makes noise and is excited"], "sample_ids": ["sYITalLZjj4", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["water, rushes, background, birds", "noise, excited, infant"], "captions_pred_video": ["two ducks are swimming in the water near each other", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["wind blows and birds chirp", "a baby cries and a woman speaks"], "question": "which entity is quieter", "label": 0}, {"captions": ["water splashes as an animal walks through", "a car accelerates and wind blows"], "sample_ids": ["w1ir-sZ3Im8", "u0TrcHhkPQ"], "start_seconds": ["90", "20"], "properties": ["animal, water, splashes", "accelerates, wind, blows"], "captions_pred_video": ["footage of a group of people riding horses through a river", null], "captions_pred_audio": ["water splashes and gurgles as people speak", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a person whistles and clicks a mouse", "people speak as gunfire rings out"], "sample_ids": ["zCrAfDfv6-A", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["person, mouse, click", "gunfire, ring, speak"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a person whistles a song", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["shmR4OZtzqA", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["man, engine, idle", "clickety-clack, train, whistle"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man speaks while a motor runs", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a woman speaks and is crumpling paper", "people cheer as a vehicle engine revs"], "sample_ids": ["xvDdE3zNf8Y", "xjhAnI2q6hM"], "start_seconds": ["120", "6"], "properties": ["A, crumple, paper", "engine revs, vehicle, people"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman speaks and crumples paper", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a man speaks as a motor runs in the background"], "sample_ids": ["su6FAOcOA8c", "xZepNM9qcRA"], "start_seconds": ["4", "30"], "properties": ["engine, run, woman", "background, motor, run"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["xKB8O8LTs6s", "yDoT73BWsdA"], "start_seconds": ["70", "10"], "properties": ["music, gunfire, explosion", "engine, revs, vehicle"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a race car accelerates and revs its engine "], "question": "which entity is more quiet", "label": 1}, {"captions": ["children speak and play together", "a toilet flushes and water drains"], "sample_ids": ["yVVP8XvWJTo", "sfAvvZwdLCY"], "start_seconds": ["260", "20"], "properties": ["children, speak, play", "water drains, flushes, water"], "captions_pred_video": ["footage of a playground at a school or daycare center", "footage of the toilet in the bathroom"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vKrYfzleLB8", "vb1fPSDI4c"], "start_seconds": ["110", "30"], "properties": ["a, ring, gunshots", "multiple, people, yell"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", null], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking?", "label": 1}, {"captions": ["someone whistles a song", "winds blows roughly as a vehicle races past"], "sample_ids": ["sIXTftIuUgw", "xjvTpk2Zpr8"], "start_seconds": ["90", "70"], "properties": ["someone, song, whistle", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a person whistling a song", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["multiple birds vocalize and wind blows", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["uoGVs9yUqY4", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["multiple, vocalize, wind", "engine, idle, woman"], "captions_pred_video": ["for how to make a wooden shed door youtube", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vXlk0lIQBFo", "tDVADusiIoc"], "start_seconds": ["470", "60"], "properties": ["wind, speak, vocalize", "water, radio, man"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio?", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "a piece of wood is being placed down and sawed"], "sample_ids": ["uRExseg-0XI", "uiItxDsDMFI"], "start_seconds": ["210", "30"], "properties": ["woman, man, water", "wood, piece, saw"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "a man cutting a log with an axe in the woods"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a saw is being used with background noise "], "question": "which entity is about a piece of wood being sawed?", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "pigeons vocalize and birds chirp"], "sample_ids": ["sShpyu2l4YQ", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["growl, bark, yip", "vocalize, bird, chirp"], "captions_pred_video": ["the puppies are playing with a toy", "of the pigeon in the cage"], "captions_pred_audio": ["a dog is barking and growling", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["tDlfY3nmx1A", "zFjIWfSD-4"], "start_seconds": ["160", "410"], "properties": ["applause, laugh, man", "People, motor, brakes"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", null], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a large bell chimes back and forth loudly", "pigeons vocalize and birds chirp"], "sample_ids": ["w2M4i1mklOA", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["loud, chime, bell", "vocalize, bird, chirp"], "captions_pred_video": ["footage of an antique clock", "of the pigeon in the cage"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["wind blows and people scream while an engine revs", "a stream of water flows as people talk and wind blows"], "sample_ids": ["w5W5Kqtc8E", "xBxDz0CFVn0"], "start_seconds": ["100", "30"], "properties": ["wind, engine, scream", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking with wind noise in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["a man speaks as horns blow", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["tHyNqRyK34A", "ukg5L09Wpvo"], "start_seconds": ["24", "150"], "properties": ["a, man, speaks", "clickety-clack, train, whistle"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "vehicles pass by on a roadway"], "sample_ids": ["xyL9F5VrjkE", "tgbONvsP47Y"], "start_seconds": ["20", "0"], "properties": ["wind, motor, distance", "pass, vehicle, roadway"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage of a fire truck entering a garage"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["y1saVTXsKwc", "tDVADusiIoc"], "start_seconds": ["80", "60"], "properties": ["a, dog, talk", "water, radio, man"], "captions_pred_video": ["a dog playing with a pink ball", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a dog barks and a man speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a woman talking?", "label": 0}, {"captions": ["a fly buzzes around loudly as birds chirp", "a train horn blows as it passes by"], "sample_ids": ["uJV8NDaHqqk", "zVacuqSb4LI"], "start_seconds": ["100", "30"], "properties": ["loud, fly, chirp", "horn, blows, train"], "captions_pred_video": ["a bee hive in a wooden box", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a swarm of bees buzzing around", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which is louder", "label": 0}, {"captions": ["a young woman speaks over spraying and another person yells", "paper is crumpling consistently"], "sample_ids": ["uYT5gxnyMWM", "v5cSxLaHADY"], "start_seconds": ["50", "0"], "properties": ["person, spray, yell", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "paper is crumpled and crinkled"], "question": "which entity is a video of a person speaking over spraying and another person yelling?", "label": 0}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "a telephone rings followed by a woman talking"], "sample_ids": ["weDbePuc-Xc", "tGcFnX0GHI"], "start_seconds": ["40", "0"], "properties": ["cartoon character, music, vocalize", "ring, talk, woman"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", null], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a person snores loudly multiple times at a close distance"], "sample_ids": ["y2bVZ7rz-5M", "sSMl2vc3ek"], "start_seconds": ["280", "20"], "properties": ["motor noise, horn, siren", "loud, multiple, distance"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", null], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a person snoring loudly"], "question": "which entity is louder", "label": 0}, {"captions": ["an adult male speaks and dials a rotary phone", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tK4VlLsNxak", "tdWhHV3X25Q"], "start_seconds": ["120", "60"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "applause, audience, yells"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaking together with birds chirping and distant murmuring", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["uiS58TNyUiw", "wDVMhEdTiVw"], "start_seconds": ["430", "30"], "properties": ["audio, man, speaking", "gun, shoot, water"], "captions_pred_video": ["of the pigeon in the cage", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be heard", "label": 0}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "birds chirp and objects are moved around"], "sample_ids": ["vKrYfzleLB8", "yPUYU6t3rwo"], "start_seconds": ["110", "370"], "properties": ["a, ring, gunshots", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "insects buzz and a man speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "wind blowing followed by a zoom"], "sample_ids": ["uPDn2BFTHk", "vr8ZXjEBhMQ"], "start_seconds": ["140", "150"], "properties": ["woman, laughs, speaks", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a baby laughs and a woman speaks", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more likely to be a video of a child and a woman laughing?", "label": 0}, {"captions": ["people speak and laugh as a child speaks", "a car accelerates and wind blows"], "sample_ids": ["sa6TLVbooCc", "u0TrcHhkPQ"], "start_seconds": ["240", "20"], "properties": ["people, laugh, child", "accelerates, wind, blows"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", null], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["someone whistles a song", "a train engine runs and a horn blows"], "sample_ids": ["sIXTftIuUgw", "zPX9o1uDiI"], "start_seconds": ["90", "40"], "properties": ["someone, song, whistle", "engine, horn, run"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person whistling a song", "a train moves with its horn blowing and wheels squealing "], "question": "which entity is a train", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "people cheer as a vehicle engine revs"], "sample_ids": ["uPDn2BFTHk", "xjhAnI2q6hM"], "start_seconds": ["140", "6"], "properties": ["woman, laughs, speaks", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "a woman speaks as she rubs two objects together"], "sample_ids": ["yajyRTUQk3U", "vzxHnu-SFEw"], "start_seconds": ["400", "80"], "properties": ["noise, woman, speak", "two objects, woman, speak"], "captions_pred_video": ["- a woman cooking in the kitchen", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a woman is speaking and breathing with mechanisms in the background "], "question": "which woman speaks over sizzling noise", "label": 0}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "some tunes played by whistling"], "sample_ids": ["slZLHwNbbt4", "u6BnG6YZqJ4"], "start_seconds": ["300", "0"], "properties": ["clap, distance, horn", "tune, play, whistling"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "water flows as men speak and yell"], "sample_ids": ["t97k0cejSQE", "vJ7JPEFhyLA"], "start_seconds": ["250", "16"], "properties": ["bird, chirp, insect", "water, flow, men"], "captions_pred_video": ["a bee on a purple thistle flower", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "birds chirp and an insect buzzes around"], "sample_ids": ["sYITalLZjj4", "t97k0cejSQE"], "start_seconds": ["30", "250"], "properties": ["water, rushes, background, birds", "bird, chirp, insect"], "captions_pred_video": ["two ducks are swimming in the water near each other", "a bee on a purple thistle flower"], "captions_pred_audio": ["wind blows and birds chirp", "a bee buzzes and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a sleeping person emits a gravely snore", "an infant crying as a woman laughs"], "sample_ids": ["w2JXXIAdUdg", "xhmRY9yhC7c"], "start_seconds": ["10", "20"], "properties": ["emits, sleeping, person", "a, laugh, infant"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a person is snoring while sleeping", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vJrjSeP17yE", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["a person is sleeping, snoring, person", "three men, wind, flow"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a person", "label": 0}, {"captions": ["people applaud and hoot and chat quietly", "an engine runs loudly"], "sample_ids": ["wwyfGO2J4", "vqZuVbG6-HI"], "start_seconds": ["90", "130"], "properties": ["people, applaud, hoot", "loud, engine, run"], "captions_pred_video": [null, "footage is blurry because it's raining outside"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "some men converse over an engine running"], "sample_ids": ["wnpJndXuxLc", "sCiy7QS1U"], "start_seconds": ["50", "300"], "properties": ["beeps, loud, whistle", "men, converse, engine"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", null], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a video of a train?", "label": 0}, {"captions": ["an adult woman and an adult man speak", "a man speaks, then dials a rotary telephone"], "sample_ids": ["zTLVJCo4WEE", "tK4VlLsNxak"], "start_seconds": ["30", "120"], "properties": ["two people, adult, speak", "a, dial, telephone"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "person is wearing a headset and holding a remote control in his hand"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man is speaking and using a sewing machine"], "question": "which entity shows a man speaking?", "label": 1}, {"captions": ["a weapon fires multiple times", "a duck quacks continuously"], "sample_ids": ["sMC07Ucy7kg", "vh30P49Po6s"], "start_seconds": ["10", "30"], "properties": ["weapon, fire, multiple", "quacks, continuously, duck"], "captions_pred_video": ["footage is from a car's point of view", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["wjsXBsc7M40", "tDVADusiIoc"], "start_seconds": ["10", "60"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "water, radio, man"], "captions_pred_video": ["footage of the baby playing with a toothbrush", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["y2bVZ7rz-5M", "uEU-Hg5MTN8"], "start_seconds": ["280", "27"], "properties": ["engine, horn, siren", "animal, grunts, snorts"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a woman is speaking and a baby is crying"], "question": "which entity is a vehicle", "label": 0}, {"captions": ["a frog vocalizes while birds chirp", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vMf1dLD6Sng", "vJ7JPEFhyLA"], "start_seconds": ["6", "16"], "properties": ["frog, bird, vocalize", "three men, wind, flow"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a frog croaks loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a frog?", "label": 1}, {"captions": ["people speak and tapping occurs", "some tunes played by whistling"], "sample_ids": ["tFCUUGdREgA", "u6BnG6YZqJ4"], "start_seconds": ["70", "0"], "properties": ["people, tap, speak", "tune, play, whistling"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "an airplane flies overhead as a woman speaks"], "sample_ids": ["su6FAOcOA8c", "zj2R0XoFr5k"], "start_seconds": ["4", "50"], "properties": ["engine, run, woman", "airplane, fly, overhead"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying overhead", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "a person screams glaringly"], "sample_ids": ["zFjIWfSD-4", "xC8kbrKJmco"], "start_seconds": ["410", "0"], "properties": ["People, motor, brakes", "glaringly, screams, person"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a goat is bleating "], "question": "which entity is more agressive", "label": 1}, {"captions": ["birds chirp and objects are moved around", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["yPUYU6t3rwo", "tDVADusiIoc"], "start_seconds": ["370", "60"], "properties": ["birds chirp, objects are moved around, birds", "water, radio, man"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["insects buzz and a man speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio?", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a helicopter engine runs continuously"], "sample_ids": ["tOSWIURC-4", "ugHJF0hfYkg"], "start_seconds": ["0", "10"], "properties": ["engine, work, nearby", "engine, running, continuously"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a lawn mower is running ", "a helicopter is flying overhead "], "question": "which entity has a running engine", "label": 1}, {"captions": ["multiple ducks quack continuously", "here comes the train and it starts to blow the horn and get close"], "sample_ids": ["wfHeoPDLMaM", "s7knHCFW82w"], "start_seconds": ["30", "30"], "properties": ["multiple, quack, continuously", "blow horn, get close, train"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "footage of the train on the tracks near a building and a car parked on the side of the road"], "captions_pred_audio": ["ducks are quacking", "a train is blowing its horn and its wheels are squealing "], "question": "which entity is a train", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["zF8yoL0rkbI", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["engine, run, someone", "rustling, ducks, quack"], "captions_pred_video": ["footage of the traffic on the street at night", null], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a duck quacks and a woman speaks"], "question": "which entity is about a vehicle engine running?", "label": 0}, {"captions": ["a man talks followed by a woman shouting", "a car accelerates and wind blows"], "sample_ids": ["s3cTDAj31g", "u0TrcHhkPQ"], "start_seconds": ["80", "20"], "properties": ["man, talk, woman", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a baby is crying", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sa6TLVbooCc", "xKB8O8LTs6s"], "start_seconds": ["240", "70"], "properties": ["people, laugh, child", "music, gunfire, explosion"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["an airplane accelerates briefly", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["zjTG0gaGCUI", "yDoT73BWsdA"], "start_seconds": ["80", "10"], "properties": ["accelerates, airplane, briefly", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a race car accelerates and revs its engine "], "question": "which is a vehicle", "label": 1}, {"captions": ["a toilet flushes and water drains", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["sfAvvZwdLCY", "vfYTJq7nU"], "start_seconds": ["20", "130"], "properties": ["water drains, flushes, water", "rustling, ducks, quack"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a duck quacks and a woman speaks"], "question": "which entity is about water?", "label": 0}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["y2ZBGpgbhHM", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["birds, tweet, pant", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["birds chirping and a dog panting", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about animals?", "label": 0}, {"captions": ["a rumble grows louder", "people applaud and hoot and chat quietly"], "sample_ids": ["y4MY9mp8-TA", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["loudness, increase, rumble", "people, applaud, hoot"], "captions_pred_video": ["a helicopter flying in the sky", null], "captions_pred_audio": ["a helicopter flies overhead ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "a stream of water runs briefly"], "sample_ids": ["weDbePuc-Xc", "x-PeY8Yb8M4"], "start_seconds": ["40", "300"], "properties": ["cartoon character, music, vocalize", "stream, water, run"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "an engine runs loudly"], "sample_ids": ["xO-Q2BlIIPU", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["two men, exclamation, speak", "loud, engine, run"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a lawn mower is running and men are speaking "], "question": "which is louder", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "a power tool runs and touches a surface"], "sample_ids": ["s3cTDAj31g", "zfvPRf3chY"], "start_seconds": ["80", "290"], "properties": ["man, talk, woman", "power tool, run, touch"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a baby is crying", "a man is speaking while a power tool is being used "], "question": "which entity is a machine", "label": 1}, {"captions": ["various birds chirp and squeal, and an animal grunts", "a man speaks as a machine runs"], "sample_ids": ["tDlysoZiA1I", "vD6lYD1l0BY"], "start_seconds": ["0", "330"], "properties": ["animal, grunts, chirps", "a, machine, run"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "game controller being held in the hands of the person"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a man is speaking and dishes are being washed "], "question": "which entity is a man speaking to a machine?", "label": 1}, {"captions": ["birds vocalize and a man speaks", "three men talk while wind blows and some liquid flows"], "sample_ids": ["v0wPrLBI3hg", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["vocalize, bird, speak", "three men, wind, flow"], "captions_pred_video": ["footage of the pigeons feeding on the ground", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["paper is crumpling consistently", "water is sprayed across a hard surface"], "sample_ids": ["v5cSxLaHADY", "sQwlkXjQabo"], "start_seconds": ["0", "10"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "water, spray, surface"], "captions_pred_video": ["footage of the person holding a pair of scissors", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["paper is crumpled and crinkled", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a horn honks and then loudly blares", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["wnpJndXuxLc", "su6FAOcOA8c"], "start_seconds": ["50", "4"], "properties": ["horn, honk, loud", "engine, idle, woman"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a woman is speaking and a subway train is moving "], "question": "which entity is quieter", "label": 1}, {"captions": ["ticking continues without interruption", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["v-g-j2uTByM", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["ticking, continuous, clock", "loud, laughter, intermittent"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a clock is ticking loudly", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is continuous", "label": 0}, {"captions": ["wind blowing and birds chirping with the distant cooing of a large bird", "a man speaks followed by another man speaking outside"], "sample_ids": ["wRBHTgrbiwg", "viuTg1M-dqg"], "start_seconds": ["50", "30"], "properties": ["birds, chirp, cooing", "two men, speak, follow"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a human speaking?", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "a car accelerates and wind blows"], "sample_ids": ["siJFXfGWgDk", "u0TrcHhkPQ"], "start_seconds": ["50", "20"], "properties": ["man, woman, vehicle", "accelerates, wind, blows"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", null], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a race car accelerates and revs its engine "], "question": "which entity is about a car?", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "a whistling owl calls out repeatedly and insects screech"], "sample_ids": ["uZesmtKZGSw", "w6RTHR6AeAg"], "start_seconds": ["250", "40"], "properties": ["car, track, man", "call, owl, screech"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", null], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "an owl hoots and mechanisms operate "], "question": "which entity is a bird?", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["tIY7qOV3rEM", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "a, scream, girl"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["birds tweet and squawk", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["w1mlz3Pe4fU", "yajyRTUQk3U"], "start_seconds": ["300", "400"], "properties": ["squawk, tweet, scream", "a woman, something, fried"], "captions_pred_video": ["of a bird in a cage", "- a woman cooking in the kitchen"], "captions_pred_audio": ["birds are chirping and singing", "a woman is speaking while food is frying in the background"], "question": "which entity is a video", "label": 1}, {"captions": ["a frog vocalizes while birds chirp", "water pouring and bubbling"], "sample_ids": ["vMf1dLD6Sng", "uyRfq-jKPpo"], "start_seconds": ["6", "50"], "properties": ["frog, bird, vocalize", "water, bubbles, pouring"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a frog croaks loudly", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vXlk0lIQBFo", "su6FAOcOA8c"], "start_seconds": ["470", "4"], "properties": ["wind, talk, vocalize", "engine, idle, woman"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["frogs croak and vocalize", "an engine runs loudly"], "sample_ids": ["yswmmRZFItk", "vqZuVbG6-HI"], "start_seconds": ["0", "130"], "properties": ["croak, vocalize, frog", "loud, engine, run"], "captions_pred_video": ["a close up of a frog in the water", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a frog is croaking", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "a clock ticktocks briefly"], "sample_ids": ["tiDFTC-5vU", "u7C-AEBQM"], "start_seconds": ["30", "30"], "properties": ["male, duck, laugh", "ticktocks, clock, ticktocks briefly"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a ticktock of a clock"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "vehicles pass by on a roadway"], "sample_ids": ["xzKKf9bKNUo", "tgbONvsP47Y"], "start_seconds": ["10", "0"], "properties": ["background, noise, snoring", "pass, vehicle, roadway"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a person snoring loudly", "a car is driving on the road "], "question": "which entity is more likely to be heard", "label": 0}, {"captions": ["a drill runs and two people laugh", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["tEE3MpBt1sg", "wDVMhEdTiVw"], "start_seconds": ["50", "30"], "properties": ["two people, laugh, drill", "gun, shoot, water"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["a person whistles a meandering tune", "a woman speaks as frying food sizzles"], "sample_ids": ["uFoga8sHpiw", "wTideSjRFS0"], "start_seconds": ["90", "30"], "properties": ["person, tune, whistle", "food, sizzle, woman"], "captions_pred_video": ["footage of a bird in a cage", "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["a person whistles a song", "a woman is speaking while water is running in the background"], "question": "which entity is a person", "label": 0}, {"captions": ["a vehicle accelerates and squeals tires", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["yRx9txMcBl0", "xKB8O8LTs6s"], "start_seconds": ["40", "70"], "properties": ["accelerates, tires, squeals", "music, gunfire, explosion"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a car is revving its engine and skidding ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a motor idles, accelerates, then slows down.", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vYkA3cfXp5Q", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["speed, idle, accelerate", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["an engine is idling", "a woman is speaking and a dog is whimpering"], "question": "which entity is a still image?", "label": 0}, {"captions": ["three men talk while wind blows and some liquid flows", "a woman speaks as she rubs two objects together"], "sample_ids": ["vJ7JPEFhyLA", "vzxHnu-SFEw"], "start_seconds": ["16", "80"], "properties": ["three men, wind, flow", "two objects, woman, speak"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity shows a woman speaking?", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "paper is crumpling consistently"], "sample_ids": ["vbpKkWvfOu4", "v5cSxLaHADY"], "start_seconds": ["560", "0"], "properties": ["a, man, speaks", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "paper is crumpled and crinkled"], "question": "which entity is a video of a woman speaking and then a man speaking?", "label": 0}, {"captions": ["the wind blows while a vehicle engine runs", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xyL9F5VrjkE", "tDVADusiIoc"], "start_seconds": ["20", "60"], "properties": ["wind, blows, vehicle", "water, radio, man"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows a vehicle running?", "label": 0}, {"captions": ["two men speak as a buffeting wind blows", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["y8WEcpOlT3I", "xfaoyyzw2WU"], "start_seconds": ["40", "180"], "properties": ["wind, speak, buffeting", "loud, jet engine, roar"], "captions_pred_video": ["on how to use a sewing machine youtube", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["birds chirp then an animal grunts", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["tDlysoZiA1I", "zl9Dqx-j7q4"], "start_seconds": ["0", "6"], "properties": ["animal, grunt, chirp", "engine, laugh, loud"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "people cheer as a vehicle engine revs"], "sample_ids": ["tDlysoZiA1I", "xjhAnI2q6hM"], "start_seconds": ["0", "6"], "properties": ["animal, grunt, multiple", "engine revs, vehicle, people"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["water gurgles, metal squeaks and the water stops", "three men talk while wind blows and some liquid flows"], "sample_ids": ["x4a9YGIw4ok", "vJ7JPEFhyLA"], "start_seconds": ["120", "16"], "properties": ["water, gurgles, stops", "three men, wind, flow"], "captions_pred_video": ["footage is blurry and out of focus", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a toilet flushes and water splashes", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about liquid flowing?", "label": 1}, {"captions": ["an audience gives applause", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["x6iCUDmRpKQ", "uYT5gxnyMWM"], "start_seconds": ["38", "50"], "properties": ["applause, audience, give", "female, spraying, scream"], "captions_pred_video": ["a black background with the moon and stars in the sky", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a group of people are clapping and cheering", "a woman is speaking and a baby is crying"], "question": "which entity is a performance", "label": 1}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "a small engine idles continuously"], "sample_ids": ["zgUgkpk78xU", "y5WII6cTH7k"], "start_seconds": ["70", "40"], "properties": ["clinking, humming, horn", "engine, idle, continuously"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of a sewing machine stitching a red and white hat"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "an engine is knocking and vibrating "], "question": "which entity is a train", "label": 0}, {"captions": ["running water in a faucet with some clinks", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zNRChLjqcU", "tdWhHV3X25Q"], "start_seconds": ["220", "60"], "properties": ["water, faucet, run", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["water is running from a faucet into a sink", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "some men converse over an engine running"], "sample_ids": ["u6jIvCtKarQ", "sCiy7QS1U"], "start_seconds": ["70", "300"], "properties": ["a, man, speaks", "men, converse, engine"], "captions_pred_video": ["footage of a person using a blender on a stove top", null], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has more people", "label": 1}, {"captions": ["wind blows strongly", "a motorcycle idles loudly as wind blows"], "sample_ids": ["w8uLijTqtlU", "v7jJS8aAyA"], "start_seconds": ["70", "10"], "properties": ["wind, blows, strongly", "wind, blows, loudly"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["the wind is blowing strongly", "a motorcycle engine is idling and vibrating"], "question": "which entity is louder", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "a clock ticktocks"], "sample_ids": ["xERFUeZONz8", "v-g-j2uTByM"], "start_seconds": ["0", "30"], "properties": ["ring, approach, traffic", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["an emergency vehicle siren blares", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vf9xf3vMsGM", "wz7N8YRy74I"], "start_seconds": ["540", "30"], "properties": ["A man speaks while turning a water faucet on.", "rooster, crow, background, men"], "captions_pred_video": ["of the person washing their hands under the faucet", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in it?", "label": 1}, {"captions": ["water rushes by", "water flows as men speak and yell"], "sample_ids": ["x-PeY8Yb8M4", "vJ7JPEFhyLA"], "start_seconds": ["300", "16"], "properties": ["water, rushes, by", "water, flow, men"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a car is driving on a wet road ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["an insect buzzes around continuously", "a man speaks followed by another man speaking outside"], "sample_ids": ["v25l1jef3JY", "viuTg1M-dqg"], "start_seconds": ["0", "30"], "properties": ["buzzes, continuously, insect", "two men, speak, follow"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a single speaker?", "label": 0}, {"captions": ["a kid speaks followed by music playing", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["tQWGZLItBXk", "tiDFTC-5vU"], "start_seconds": ["170", "30"], "properties": ["music, kid, speak", "male, duck, laugh"], "captions_pred_video": ["worms revolution screenshots", null], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking?", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "a telephone rings followed by a woman talking"], "sample_ids": ["sapQIQUhFc", "tGcFnX0GHI"], "start_seconds": ["280", "0"], "properties": ["liquid, flow, distance", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["wTjoRj1se3U", "yDoT73BWsdA"], "start_seconds": ["390", "10"], "properties": ["engine, run, people", "engine, revs, vehicle"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a jet engine is running and people are talking", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "an infant crying as a woman laughs"], "sample_ids": ["uYT5gxnyMWM", "xhmRY9yhC7c"], "start_seconds": ["50", "20"], "properties": ["female, spraying, scream", "a, laugh, infant"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a person is snoring while sleeping", "people cheer as a vehicle engine revs"], "sample_ids": ["vJrjSeP17yE", "xjhAnI2q6hM"], "start_seconds": ["40", "6"], "properties": ["a person is sleeping, snoring, person", "engine revs, vehicle, people"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a person snoring loudly", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["an airplane engine runs", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["yVPZ2MNWpms", "zFjIWfSD-4"], "start_seconds": ["0", "410"], "properties": ["engine, airplane, runs", "People, motor, brakes"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", null], "captions_pred_audio": ["a car is driving by on the road ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["se87d6yxEOA", "tiDFTC-5vU"], "start_seconds": ["10", "30"], "properties": ["run, whistle, pass", "male, duck, laugh"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", null], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a man is speaking and ducks are quacking"], "question": "which entity is speaking", "label": 1}, {"captions": ["an insect buzzes around continuously", "a person sniffs and sneezes"], "sample_ids": ["v25l1jef3JY", "uRlbY6aoBU"], "start_seconds": ["0", "0"], "properties": ["buzzes, continuously, insect", "sneezes, person, sniffs"], "captions_pred_video": ["a black background with a cartoon character in the foreground", null], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a man is sneezing "], "question": "which entity is not a person?", "label": 0}, {"captions": ["a crowd yells, reacts and applauds", "waves crash against a shoreline and people speak"], "sample_ids": ["wztCSUxOf8", "yFB25fqfU8I"], "start_seconds": ["130", "300"], "properties": ["a crowd, yells, applauds", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which is a natural phenomenon", "label": 1}, {"captions": ["a stream runs then someone speaks", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["wbHTKEJZyhc", "tiDFTC-5vU"], "start_seconds": ["20", "30"], "properties": ["stream, run, someone", "male, duck, laugh"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", null], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a man is speaking and ducks are quacking"], "question": "which entity is a video of a duck quacking?", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a woman speaks as she rubs two objects together"], "sample_ids": ["vZAw4apG0Es", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["background, tick, repeat", "two objects, woman, speak"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a clock is ticking and people are talking", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["a woman speaks and then a man speaks", "a man speaks as a car is passing by"], "sample_ids": ["vbpKkWvfOu4", "sK4u5T8hW78"], "start_seconds": ["560", "30"], "properties": ["a, man, speaks", "a, car, pass"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["heavy rain splashes as it falls", "multiple birds chirp and an animal grunts"], "sample_ids": ["wP8ZKrlx3oA", "tDlysoZiA1I"], "start_seconds": ["40", "0"], "properties": ["fall, rain, splash", "animal, grunt, multiple"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a heavy rain is falling on a surface", "birds are chirping and a rooster is crowing "], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["speaking following by laughing and clapping", "birds chirp and objects are moved around"], "sample_ids": ["u2f5NpsoHBg", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["person, laugh, clap", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "insects buzz and a man speaks"], "question": "which entity is a bird", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "water flows as men speak and yell"], "sample_ids": ["u6jIvCtKarQ", "vJ7JPEFhyLA"], "start_seconds": ["70", "16"], "properties": ["a, man, speaks", "water, flow, men"], "captions_pred_video": ["footage of a person using a blender on a stove top", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more water flowing", "label": 1}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["zgUgkpk78xU", "xKB8O8LTs6s"], "start_seconds": ["70", "70"], "properties": ["clinking, humming, horn", "music, gunfire, explosion"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be heard", "label": 0}, {"captions": ["a toilet door squeaks as it is opened", "repeated tapping is accompanied by water running and a woman speaking softly"], "sample_ids": ["sdXV-ylviw", "wvKpEYswXO0"], "start_seconds": ["190", "150"], "properties": ["door, toilet, squeaks", "sound, water, running"], "captions_pred_video": [null, "of the person preparing food in the kitchen"], "captions_pred_audio": ["a dog barks and taps with background noise ", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is accompanied by water running", "label": 1}, {"captions": ["male speech with light ticking", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["xO-Q2BlIIPU", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["male, speech, ticking", "two men, woman, birds"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a duck quacks several times", "water rushes and then a vehicle zooms past"], "sample_ids": ["vh30P49Po6s", "s4Uz1Ffgo04"], "start_seconds": ["30", "100"], "properties": ["quacks, duck, several", "water, rushes, vehicle"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sTpirNYo8vQ", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["a, tone, fast", "three men, wind, flow"], "captions_pred_video": ["of a man taking a selfie on a bus", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a more natural setting", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "water splashes as an animal walks through"], "sample_ids": ["tGcFnX0GHI", "w1ir-sZ3Im8"], "start_seconds": ["0", "90"], "properties": ["ring, talk, woman", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a consistent ticking pattern", "a car accelerates and wind blows"], "sample_ids": ["sCeWURVHfOM", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["ticking, pattern, clock", "accelerates, wind, blows"], "captions_pred_video": ["- a close-up view of the clock's inner workings", null], "captions_pred_audio": ["ticking of a clock", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "a child speaks in closed space"], "sample_ids": ["yJ0TePmaOo", "yW6FWLSLkx4"], "start_seconds": ["390", "40"], "properties": ["two hard objects, man, speak", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a airplane flies overhead as a woman speaks", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["zj2R0XoFr5k", "wqZ135Ssz0"], "start_seconds": ["50", "60"], "properties": ["airplane, fly, woman", "two men, woman, birds"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", null], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more birds", "label": 1}, {"captions": ["scraping and female speech with distant music", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["yHeVV-xeOxQ", "tDVADusiIoc"], "start_seconds": ["130", "60"], "properties": ["female, speech, music", "water, radio, man"], "captions_pred_video": ["of a girl milking a goat's udder", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a clock ticktocks briefly", "small dogs yip and bark sharply"], "sample_ids": ["u7C-AEBQM", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["ticktocks, clock, ticktocks briefly", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a ticktock of a clock", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "a man talks while a clock does ticktock"], "sample_ids": ["xvDdE3zNf8Y", "spYNpeN7rPY"], "start_seconds": ["120", "1"], "properties": ["a, female, speaks", "a clock, ticktock, man"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking and breathing with background noise "], "question": "which entity is a clock?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a man speaks as a motor runs in the background"], "sample_ids": ["sfAvvZwdLCY", "xZepNM9qcRA"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "background, motor, run"], "captions_pred_video": ["footage of the toilet in the bathroom", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a toilet is flushed", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "a car speeding up in the distance"], "sample_ids": ["sG7TyPnFDR0", "u0TrcHhkPQ"], "start_seconds": ["180", "20"], "properties": ["beeps, machine, smoke alarm", "distance, car, speed"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", null], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "winds blows roughly as a vehicle races past"], "sample_ids": ["w2M4i1mklOA", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["loud, chime, bell", "wind, blows, vehicle"], "captions_pred_video": ["footage of an antique clock", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a woman speaks and other women and a man talk with her", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vbpKkWvfOu4", "su6FAOcOA8c"], "start_seconds": ["560", "4"], "properties": ["a, woman, man", "engine, idle, woman"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a woman is speaking and a subway train is moving "], "question": "which woman is speaking", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "someone is typing on a computer keyboard"], "sample_ids": ["tEE3MpBt1sg", "v0x1odnXtP0"], "start_seconds": ["50", "210"], "properties": ["drill, something, laugh", "keyboard, type, computer"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "how to make money on youtube in spanish"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a person is typing on a keyboard"], "question": "which is not a drill", "label": 1}, {"captions": ["motors runs briefly and tires screech", "vehicles pass by on a roadway"], "sample_ids": ["yRx9txMcBl0", "tgbONvsP47Y"], "start_seconds": ["40", "0"], "properties": ["motors, tires, screech", "pass, vehicle, roadway"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a car is driving on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "a vehicle accelerates and squeals tires"], "sample_ids": ["sQGXqGcwOTc", "yRx9txMcBl0"], "start_seconds": ["3", "40"], "properties": ["audio, kid, giggles", "accelerates, tires, squeals"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a car is revving its engine and skidding "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "some men converse over an engine running"], "sample_ids": ["zhx6hoYrHeI", "sCiy7QS1U"], "start_seconds": ["160", "300"], "properties": ["engine, sputter, rough", "men, converse, engine"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a person is snoring while sleeping", "water flows as men speak and yell"], "sample_ids": ["vJrjSeP17yE", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["a person is sleeping, snoring, person", "water, flow, men"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a person speaking and yelling?", "label": 1}, {"captions": ["an adult woman speaks over chopping and silverware noises", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["yYJksgsxx5U", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["audio, woman, silverware", "a woman, something, fried"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a woman is speaking while food is frying in the background"], "question": "which woman is speaking over chopping and silverware noises", "label": 0}, {"captions": ["a man speaks in the background while a slow tick repeats", "a man speaks as a car is passing by"], "sample_ids": ["vZAw4apG0Es", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["background, tick, repeat", "a, car, pass"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a moving object", "label": 1}, {"captions": ["a child speaks", "an infant crying as a woman laughs"], "sample_ids": ["yW6FWLSLkx4", "xhmRY9yhC7c"], "start_seconds": ["40", "20"], "properties": ["a, child, speaks", "a, laugh, infant"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["birds vocalize and chirp continuously", "birds chirp and objects are moved around"], "sample_ids": ["w1mlz3Pe4fU", "yPUYU6t3rwo"], "start_seconds": ["300", "370"], "properties": ["vocalize, chirp, continuously", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a bird in a cage", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["birds are chirping and singing", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["continuous snoring", "a person sniffles and then sneezes in the distance"], "sample_ids": ["sLkeqCDJIyw", "uRlbY6aoBU"], "start_seconds": ["120", "0"], "properties": ["loud, snoring, noise", "a, distance, sneeze"], "captions_pred_video": [", what is the man doing on the couch? sleeping", null], "captions_pred_audio": ["a person is snoring loudly", "a man is sneezing "], "question": "which entity is not loud", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "a duck quacks continuously"], "sample_ids": ["s3cTDAj31g", "vh30P49Po6s"], "start_seconds": ["80", "30"], "properties": ["man, talk, woman", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking and a baby is crying", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "paper is crumpling consistently"], "sample_ids": ["zY3icUyMdh8", "v5cSxLaHADY"], "start_seconds": ["20", "0"], "properties": ["dog, bark, engine", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a motorcycle engine is idling", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["vZAqdHZ81yA", "wSVhSdj0F0"], "start_seconds": ["180", "10"], "properties": ["engine, motorcycle, idling", "horn honks, keys jingle, electronic beep"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", null], "captions_pred_audio": ["an engine is idling loudly", "a car horn honks and keys jangle with background noise "], "question": "which entity is not a motorcycle?", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "an insect buzzes around continuously"], "sample_ids": ["zl9Dqx-j7q4", "v25l1jef3JY"], "start_seconds": ["6", "0"], "properties": ["engine, laugh, loud", "buzzes, continuously, insect"], "captions_pred_video": ["footage of a man driving a car in the dark", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a jet engine roars ", "a fly is buzzing around a microphone "], "question": "which entity is not a human", "label": 1}, {"captions": ["a small engine spits as it runs", "water flows as men speak and yell"], "sample_ids": ["sZvwOuuPGP0", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["spits, engine, runs", "water, flow, men"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a medium engine is running ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a man speaking and yelling?", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "an engine runs loudly"], "sample_ids": ["sZPuqDgX2V0", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["commentator, race, track", "loud, engine, run"], "captions_pred_video": [null, "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a lawn mower is running and men are speaking "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["a telephone rings followed by a woman talking", "a vehicle engine accelerating then running on idle"], "sample_ids": ["tGcFnX0GHI", "vYkA3cfXp5Q"], "start_seconds": ["0", "30"], "properties": ["ring, talk, woman", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["wind loudly blowing while people speak in the background followed by a horn blowing", "an adult woman and an adult man speak"], "sample_ids": ["xjhAnI2q6hM", "zTLVJCo4WEE"], "start_seconds": ["6", "30"], "properties": ["wind, blow, loudly", "two people, adult, speak"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "- a boy with a rifle aiming at a target"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a woman speaks and crickets chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["someone is burping continuously", "a drill runs and two people laugh"], "sample_ids": ["y636gklDioE", "tEE3MpBt1sg"], "start_seconds": ["20", "50"], "properties": ["burps, burps, burps", "two people, laugh, drill"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a person burps loudly several times", "people are laughing breathing and speaking with background noise "], "question": "which entity is more likely to be a joke", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "a stream of water flows as people talk and wind blows"], "sample_ids": ["wqUmIEzuNz4", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["frog, bird, vocalize", "stream, water, flow"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", "footage is blurry and out of focus"], "captions_pred_audio": ["a cat meows and rustles", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a child speaks in closed space"], "sample_ids": ["tDVADusiIoc", "yW6FWLSLkx4"], "start_seconds": ["60", "40"], "properties": ["water, radio, man", "child, space, speak"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["multiple ducks quack continuously", "a dog barks and whimpers"], "sample_ids": ["wfHeoPDLMaM", "sShpyu2l4YQ"], "start_seconds": ["30", "0"], "properties": ["multiple, quack, continuously", "barks, whimpers, dog"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "the puppies are playing with a toy"], "captions_pred_audio": ["ducks are quacking", "a dog is barking and growling"], "question": "which entity is a dog", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "an airplane engine spools and people speak"], "sample_ids": ["vZAw4apG0Es", "wTjoRj1se3U"], "start_seconds": ["30", "390"], "properties": ["background, clock, ticktocks", "airplane, engine, spool"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a clock is ticking and people are talking", "a jet engine is running and people are talking"], "question": "which entity is a video of a clock ticking?", "label": 0}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "a man speaks as birds chirp and a vehicle passes nearby"], "sample_ids": ["sofxkNWaP0s", "siJFXfGWgDk"], "start_seconds": ["30", "50"], "properties": ["wind, engine, louder", "a, bird, vehicle"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a man is speaking and birds are chirping in the background "], "question": "which entity is about a vehicle passing nearby?", "label": 1}, {"captions": ["a cat meows and children speak", "pigeons vocalize and birds chirp"], "sample_ids": ["x5cuQjOdM3E", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["cat, speak, children", "vocalize, bird, chirp"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of the pigeon in the cage"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a propeller moves loudly nearby", "a person uses a saw to cut some wood"], "sample_ids": ["ugHJF0hfYkg", "sHbXC6na9hg"], "start_seconds": ["10", "0"], "properties": ["loud, propeller, move", "a person, saw, wood"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a man using a tractor to cut a log into firewood youtube"], "captions_pred_audio": ["a helicopter is flying overhead ", "an engine is idling and vibrating"], "question": "which entity is quieter", "label": 1}, {"captions": ["an animal quacks rapidly", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vh30P49Po6s", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["animal, quacks, rapidly", "music, gunfire, explosion"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a duck is quacking loudly", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be a movie", "label": 1}, {"captions": ["people speak and tapping occurs", "a vehicle is skidding and squealing tires"], "sample_ids": ["tFCUUGdREgA", "soTOh3zYJfY"], "start_seconds": ["70", "40"], "properties": ["people, tap, speak", "vehicle, skid, tires"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "a red car drifting on a winding road with smoke coming out of it"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["ul60S8TXDA8", "yajyRTUQk3U"], "start_seconds": ["60", "400"], "properties": ["sound, distance, bell", "a woman, something, fried"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["people clap and speak in the distance", "water pouring and bubbling"], "sample_ids": ["wwyfGO2J4", "uyRfq-jKPpo"], "start_seconds": ["90", "50"], "properties": ["clap, distance, speak", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sShpyu2l4YQ", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["growl, bark, yip", "stream, water, flow"], "captions_pred_video": ["the puppies are playing with a toy", "footage is blurry and out of focus"], "captions_pred_audio": ["a dog is barking and growling", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["bees buzz as wind blows", "frogs croak and vocalize"], "sample_ids": ["tMJne1a4AFI", "yswmmRZFItk"], "start_seconds": ["0", "0"], "properties": ["bees, buzz, wind", "croak, vocalize, frog"], "captions_pred_video": ["a swarm of bees on the ground", "a close up of a frog in the water"], "captions_pred_audio": ["a swarm of bees buzzing around", "a frog is croaking"], "question": "which animal is more likely to be a frog", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "winds blows roughly as a vehicle races past"], "sample_ids": ["sWZzXuWYY", "xjvTpk2Zpr8"], "start_seconds": ["420", "70"], "properties": ["male, clanks, thumps", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["siJFXfGWgDk", "su6FAOcOA8c"], "start_seconds": ["50", "4"], "properties": ["a, bird, vehicle", "engine, idle, woman"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tDlfY3nmx1A", "yajyRTUQk3U"], "start_seconds": ["160", "400"], "properties": ["applause, laugh, man", "a woman, something, fried"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a woman is speaking while food is frying in the background"], "question": "which entity is about food?", "label": 1}, {"captions": ["a duck quacks several times", "a man speaks as a car is passing by"], "sample_ids": ["vh30P49Po6s", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["quacks, duck, several", "a, car, pass"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking with background noise and breathing sounds "], "question": "which entity is moving", "label": 1}, {"captions": ["a man is filing a hard object", "a telephone rings and a bird vocalizes"], "sample_ids": ["vveS8HT7Uog", "skd2PphS6oI"], "start_seconds": ["100", "190"], "properties": ["a man, hard, object", "ring, bird, vocalize"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a telephone bell rings repeatedly "], "question": "which entity is not a bird?", "label": 0}, {"captions": ["some liquid flows while a woman laughs and man talks", "vehicles pass by on a roadway"], "sample_ids": ["vddP56-ogds", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["liquid, laughs, man", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a car is driving on the road "], "question": "which entity shows vehicles moving", "label": 1}, {"captions": ["a few ducks quack and scamper and a man speaks", "a horn blasts loudly as a train passes"], "sample_ids": ["w2bYrCVLT60", "zsLxS-uLJTw"], "start_seconds": ["120", "20"], "properties": ["ducks, speak, quack", "horn, blast, train"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", "footage of the train on the tracks at sunrise or sunset"], "captions_pred_audio": ["ducks are quacking and a man is speaking", "a train blows its horn and moves on the tracks "], "question": "which is louder", "label": 0}, {"captions": ["a motorcycle engine is idling", "people speak as gunfire rings out"], "sample_ids": ["vZAqdHZ81yA", "wqTCwqVRDlk"], "start_seconds": ["180", "80"], "properties": ["engine, motorcycle, idling", "gunfire, ring, speak"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["an engine is idling loudly", "a man is speaking and a gun is fired"], "question": "which entity is not a motorcycle?", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "water is sprayed across a hard surface"], "sample_ids": ["zcDwZ6W7E3E", "sQwlkXjQabo"], "start_seconds": ["180", "10"], "properties": ["a, man, speak", "water, spray, surface"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "spraying followed by silence"], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["a person whistles a meandering tune", "a car accelerates and wind blows"], "sample_ids": ["uFoga8sHpiw", "u0TrcHhkPQ"], "start_seconds": ["90", "20"], "properties": ["person, tune, whistle", "accelerates, wind, blows"], "captions_pred_video": ["footage of a bird in a cage", null], "captions_pred_audio": ["a person whistles a song", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman and man are speaking", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vbpKkWvfOu4", "vb1fPSDI4c"], "start_seconds": ["560", "30"], "properties": ["two people, speaking, woman, man", "multiple, people, yell"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["people speak then an engine runs", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["uMTTDZ2mb4", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["engine, run, people", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a vehicle engine running?", "label": 1}, {"captions": ["an emergency siren wails as it passes", "a clock ticktocks"], "sample_ids": ["vGj1XLJvNrw", "v-g-j2uTByM"], "start_seconds": ["0", "30"], "properties": ["wails, wails, pass", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a police car driving down a city street", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["food is frying then a woman speaks", "water splashes and a door squeaks"], "sample_ids": ["ukxt9I7eMMg", "sdXV-ylviw"], "start_seconds": ["30", "190"], "properties": ["food, woman, speak", "sound, splash, door"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a dog barks and taps with background noise "], "question": "which entity is silent", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["t97k0cejSQE", "tDlysoZiA1I"], "start_seconds": ["250", "0"], "properties": ["bird, chirp, insect", "animal, grunts, chirps"], "captions_pred_video": ["a bee on a purple thistle flower", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "birds are chirping and a rooster is crowing "], "question": "which entity has a bird chirp and an animal grunts?", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["u6jIvCtKarQ", "uZesmtKZGSw"], "start_seconds": ["70", "250"], "properties": ["a, man, speaks", "men, talk, cars"], "captions_pred_video": ["footage of a person using a blender on a stove top", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a cat meows and children speak", "a clock ticktocks"], "sample_ids": ["x5cuQjOdM3E", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["cat, speak, children", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a black background with an airplane flying in the sky", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a cat meows and a woman speaks", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sEprKHm8Sj8", "uYT5gxnyMWM"], "start_seconds": ["90", "50"], "properties": ["car, tires, slows", "female, spraying, scream"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaking with light rustling", "a man speaks over intermittent keyboard taps"], "sample_ids": ["zOZleIRqZm4", "tw76HGONaKg"], "start_seconds": ["80", "570"], "properties": ["light, rustling, man", "audio, man, keyboard"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man speaks and types on a computer keyboard "], "question": "which entity has a man speaking over intermittent keyboard taps?", "label": 1}, {"captions": ["a machine runs continuously", "a machine engine runs and a man speaks"], "sample_ids": ["wdXV3Pv0jiY", "vs65y4qmyBE"], "start_seconds": ["11", "340"], "properties": ["machine, running, continuously", "engine, run, man"], "captions_pred_video": ["footage is blurry and shaky", "a car is engulfed in flames on the side of the road"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a heavy engine is running and men are speaking "], "question": "which machine is running continuously", "label": 0}, {"captions": ["an electronic device bleeps once", "people applaud and hoot and chat quietly"], "sample_ids": ["tHJ6JSa8Y4", "wwyfGO2J4"], "start_seconds": ["0", "90"], "properties": ["bleeps, electronic, device", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a clock is ticking and beeping", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "multiple people speak and children yell while water gurgles"], "sample_ids": ["xOZfdgAgJ9o", "vb1fPSDI4c"], "start_seconds": ["40", "30"], "properties": ["woman, whimpering, speaking", "multiple, people, yell"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["an airplane engine spools and people speak", "winds blows roughly as a vehicle races past"], "sample_ids": ["wTjoRj1se3U", "xjvTpk2Zpr8"], "start_seconds": ["390", "70"], "properties": ["airplane, engine, spool", "wind, blows, vehicle"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a jet engine is running and people are talking", "a jet engine roars and wind blows "], "question": "which entity is moving", "label": 1}, {"captions": ["the rumbling of a bus followed by a soft male voice", "an electric engine works nearby followed by a child talking"], "sample_ids": ["vK93VuO0yNc", "xSKJGCItUWE"], "start_seconds": ["30", "10"], "properties": ["male voice, bus, rumble", "engine, work, child"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", "footage of the helicopter flying in the room"], "captions_pred_audio": ["a car drives by with wind noise in the background ", "a high pitched engine is running and a child speaks"], "question": "which entity is a machine", "label": 1}, {"captions": ["multiple ducks quack continuously", "a drill drills through something then people begin laughing"], "sample_ids": ["wfHeoPDLMaM", "tEE3MpBt1sg"], "start_seconds": ["30", "50"], "properties": ["multiple, quack, continuously", "drill, something, laugh"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["ducks are quacking", "people are laughing breathing and speaking with background noise "], "question": "which entity is a drill?", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["zj2R0XoFr5k", "wqZ135Ssz0"], "start_seconds": ["50", "60"], "properties": ["airplane, fly, overhead", "two men, woman, birds"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", null], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a bird?", "label": 1}, {"captions": ["an emergency siren wails as it passes", "a toilet flushes and a female speaks"], "sample_ids": ["vGj1XLJvNrw", "yaln9y8I7ms"], "start_seconds": ["0", "230"], "properties": ["wails, wails, pass", "female, flushes, toilet"], "captions_pred_video": ["footage of a police car driving down a city street", "footage is blurry and out of focus"], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "a toilet flushes and a man speaks"], "question": "which entity is silent", "label": 1}, {"captions": ["a toilet flushes and water drains", "a vehicle is skidding and squealing tires"], "sample_ids": ["sfAvvZwdLCY", "soTOh3zYJfY"], "start_seconds": ["20", "40"], "properties": ["water drains, flushes, water", "vehicle, skid, tires"], "captions_pred_video": ["footage of the toilet in the bathroom", "a red car drifting on a winding road with smoke coming out of it"], "captions_pred_audio": ["a toilet is flushed", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a motorcycle engine works nearby", "winds blows roughly as a vehicle races past"], "sample_ids": ["tOSWIURC-4", "xjvTpk2Zpr8"], "start_seconds": ["0", "70"], "properties": ["engine, work, nearby", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a lawn mower is running ", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a man talks followed by a woman shouting", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["s3cTDAj31g", "tdWhHV3X25Q"], "start_seconds": ["80", "60"], "properties": ["man, talk, woman", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and a baby is crying", "a man is speaking and a crowd is clapping"], "question": "which entity has more people", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "paper folding and crinkling"], "sample_ids": ["ugHJF0hfYkg", "zPpG3RD8lSs"], "start_seconds": ["10", "20"], "properties": ["loud, intense, propeller", "paper, fold, crinkle"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a helicopter is flying overhead ", "the wind blows and a mouse clicks "], "question": "which is quieter", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "a clock ticktocks briefly"], "sample_ids": ["zFjIWfSD-4", "u7C-AEBQM"], "start_seconds": ["410", "30"], "properties": ["People, motor, brakes", "ticktocks, clock, ticktocks briefly"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a ticktock of a clock"], "question": "which entity is silent", "label": 1}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "a train horn blows as it passes by"], "sample_ids": ["wnpJndXuxLc", "zVacuqSb4LI"], "start_seconds": ["50", "30"], "properties": ["beeps, loud, whistle", "horn, blows, train"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train?", "label": 1}, {"captions": ["a power tool runs and touches a surface", "birds chirp and objects are moved around"], "sample_ids": ["zfvPRf3chY", "yPUYU6t3rwo"], "start_seconds": ["290", "370"], "properties": ["power tool, run, touch", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking while a power tool is being used ", "insects buzz and a man speaks"], "question": "which is not a power tool", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "paper is crumpling consistently"], "sample_ids": ["xSKJGCItUWE", "v5cSxLaHADY"], "start_seconds": ["10", "0"], "properties": ["engine, run, boy", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of the helicopter flying in the room", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["some people speak", "birds chirp and objects are moved around"], "sample_ids": ["vbZ-0lGPneg", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a dog barks and whimpers", "sucking and grunting followed by slurping with birds in the background"], "sample_ids": ["sShpyu2l4YQ", "yYEVLuqEytU"], "start_seconds": ["0", "40"], "properties": ["barks, whimpers, dog", "grunt, slurp, background"], "captions_pred_video": ["the puppies are playing with a toy", "a baby goat is being petted by a person's hand"], "captions_pred_audio": ["a dog is barking and growling", "several sheep bleat and a man speaks"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a sleeping person emits a gravely snore", "birds chirp and a dog breathes heavily"], "sample_ids": ["w2JXXIAdUdg", "y2ZBGpgbhHM"], "start_seconds": ["10", "30"], "properties": ["emits, sleeping, person", "dog, chirp, breathe"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", null], "captions_pred_audio": ["a person snoring and a dog whimpering", "birds chirping and a dog panting"], "question": "which entity is a person", "label": 0}, {"captions": ["someone sprays liquid onto a hard surface", "a man speaks as a motor runs in the background"], "sample_ids": ["sQwlkXjQabo", "xZepNM9qcRA"], "start_seconds": ["10", "30"], "properties": ["liquid, surface, spray", "background, motor, run"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["spraying followed by silence", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "an airplane engine spools and people speak"], "sample_ids": ["yRx9txMcBl0", "wTjoRj1se3U"], "start_seconds": ["40", "390"], "properties": ["accelerates, tires, squeals", "airplane, engine, spool"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a jet engine is running and people are talking"], "question": "which entity is a vehicle", "label": 0}, {"captions": ["people speak as gunfire rings out", "water pouring and bubbling"], "sample_ids": ["wqTCwqVRDlk", "uyRfq-jKPpo"], "start_seconds": ["80", "50"], "properties": ["gunfire, ring, speak", "water, bubbles, pouring"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and a gun is fired", "water is running from a faucet"], "question": "which entity is more calm", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "a car accelerates and wind blows"], "sample_ids": ["w2M4i1mklOA", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["alarm, gears, turn", "accelerates, wind, blows"], "captions_pred_video": ["footage of an antique clock", null], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["water running down a sink while a man is talking", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vSeGhaZt-aI", "xKB8O8LTs6s"], "start_seconds": ["50", "70"], "properties": ["water, sink, talk", "music, gunfire, explosion"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "birds chirp quietly and an adult man speaks"], "sample_ids": ["zF8yoL0rkbI", "zuua6-5goWw"], "start_seconds": ["30", "30"], "properties": ["engine, run, someone", "birds, chirp, quiet, man, speaks"], "captions_pred_video": ["footage of the traffic on the street at night", "in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "birds are chirping and a man is speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "water is sprayed across a hard surface"], "sample_ids": ["zY3icUyMdh8", "sQwlkXjQabo"], "start_seconds": ["20", "10"], "properties": ["dog, bark, engine", "water, spray, surface"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["electronic beeps occur in a short series", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["y682ml90jGw", "zj2R0XoFr5k"], "start_seconds": ["11", "50"], "properties": ["beeps, series, electronic", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a beeping sound is being made ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["sfAvvZwdLCY", "zFjIWfSD-4"], "start_seconds": ["20", "410"], "properties": ["flushes, drains, water", "People, motor, brakes"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a machine?", "label": 0}, {"captions": ["a person screams glaringly", "people applaud and hoot and chat quietly"], "sample_ids": ["xC8kbrKJmco", "wwyfGO2J4"], "start_seconds": ["0", "90"], "properties": ["glaringly, screams, person", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a goat is bleating ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "multiple people speak and children yell while water gurgles"], "sample_ids": ["sQGXqGcwOTc", "vb1fPSDI4c"], "start_seconds": ["3", "30"], "properties": ["cling, speak, dishes", "multiple, people, yell"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", null], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a wooden clack accompanies nearby chirping birds"], "sample_ids": ["weDbePuc-Xc", "yeFvk9x0wWI"], "start_seconds": ["40", "30"], "properties": ["music, slaps, human", "clack, bird, chirp"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "birds chirp in the background as a car drives by "], "question": "which entity has a human sniveling?", "label": 0}, {"captions": ["a stream of water flows quickly", "water flows and trickles"], "sample_ids": ["wbHTKEJZyhc", "tB7hWb9gTuQ"], "start_seconds": ["20", "30"], "properties": ["stream, water, flow", "water, flow, trickle"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "water is splashing and gurgling"], "question": "which entity is flowing more slowly", "label": 1}, {"captions": ["small dogs yip and bark sharply", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["v-wcQf4BDY0", "yajyRTUQk3U"], "start_seconds": ["120", "400"], "properties": ["bark, yip, sharply", "a woman, something, fried"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a dog barks and growls", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks and birds chirp in the distance", "a duck quacks and men speak and laugh"], "sample_ids": ["uGS7O46tlSo", "tiDFTC-5vU"], "start_seconds": ["70", "30"], "properties": ["a, distance, chirp", "quacks, speak, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking?", "label": 1}, {"captions": ["a motorcycle engine is idling", "a machine beeps continuously"], "sample_ids": ["vZAqdHZ81yA", "y682ml90jGw"], "start_seconds": ["180", "11"], "properties": ["engine, motorcycle, idling", "beeps, machine, continuously"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", null], "captions_pred_audio": ["an engine is idling loudly", "a beeping sound is being made "], "question": "which entity is not a motorcycle?", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "an infant crying frantically"], "sample_ids": ["vJvryTwuAV8", "zwOBqeFTgiU"], "start_seconds": ["16", "30"], "properties": ["audience, cheer, man", "cry, infant, frantically"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a goat screams and people speak in the background", "continuous chugging with birds chirping in the background"], "sample_ids": ["xC8kbrKJmco", "xM4joTqDVp4"], "start_seconds": ["0", "160"], "properties": ["background, goat, scream", "background, chirp, birds"], "captions_pred_video": [null, "footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack"], "captions_pred_audio": ["a goat is bleating ", "birds are chirping and a train is moving "], "question": "which entity has a more natural background", "label": 1}, {"captions": ["some clanking with distant murmuring", "a stream of water runs briefly"], "sample_ids": ["uMTTDZ2mb4", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["clanking, murmuring, distant", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["water running down a sink while a man is talking", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vSeGhaZt-aI", "wz7N8YRy74I"], "start_seconds": ["50", "30"], "properties": ["water, sink, talk", "rooster, crow, background, men"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in it?", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["wSVhSdj0F0", "uYT5gxnyMWM"], "start_seconds": ["10", "50"], "properties": ["horn honks, keys jingle, slam", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a person snores loudly multiple times at a close distance", "a woman speaks happily and an animal chirps"], "sample_ids": ["sSMl2vc3ek", "uWAAAL4CIoc"], "start_seconds": ["20", "0"], "properties": ["loud, multiple, distance", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking and a dog is barking "], "question": "which entity is quieter", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a series of light horn beeps is followed by a loud steam whistle"], "sample_ids": ["su6FAOcOA8c", "wnpJndXuxLc"], "start_seconds": ["4", "50"], "properties": ["engine, run, woman", "beeps, loud, whistle"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sncRqQ67iJU", "xBxDz0CFVn0"], "start_seconds": ["460", "30"], "properties": ["loud, repeatedly, man", "stream, water, flow"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "footage is blurry and out of focus"], "captions_pred_audio": ["a person is snoring", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a person is whistling", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sIXTftIuUgw", "xKB8O8LTs6s"], "start_seconds": ["90", "70"], "properties": ["person, whistling, person", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a person whistling a song", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more calm", "label": 0}, {"captions": ["a person is burping while a girl speaks", "an infant crying frantically"], "sample_ids": ["vdoxuJn9lTc", "zwOBqeFTgiU"], "start_seconds": ["40", "30"], "properties": ["person, burp, girl", "cry, infant, frantically"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "of the baby crying in the car seat"], "captions_pred_audio": ["a child speaks followed by a burp", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["an engine starts and increases in power", "a person snores loudly multiple times at a close distance"], "sample_ids": ["zjTG0gaGCUI", "sSMl2vc3ek"], "start_seconds": ["80", "20"], "properties": ["power, increase, engine", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a jet engine roars as wind blows ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["someone snores nearby", "a man speaks as a car is passing by"], "sample_ids": ["spJCm8tD9Zo", "sK4u5T8hW78"], "start_seconds": ["90", "30"], "properties": ["someone snores, nearby, someone", "a, car, pass"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["paper folding and crinkling", "crowd applause while a guy laughs followed by another man speaking"], "sample_ids": ["zPpG3RD8lSs", "tDlfY3nmx1A"], "start_seconds": ["20", "160"], "properties": ["paper, fold, crinkle", "applause, laugh, man"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "a man in a suit and tie is talking to another man in a suit and tie"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a crowd is clapping and laughing and a man is speaking "], "question": "which entity is a crowd", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["xyx6eNVEYRY", "w5W5Kqtc8E"], "start_seconds": ["380", "100"], "properties": ["loud, engine, muffles", "wind, blow, vehicle"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", null], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "a infant makes noise and is excited"], "sample_ids": ["wvKpEYswXO0", "wIJK3-5y0kA"], "start_seconds": ["150", "30"], "properties": ["water, tap, run", "noise, excited, infant"], "captions_pred_video": ["of the person preparing food in the kitchen", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["slZLHwNbbt4", "vbZ-0lGPneg"], "start_seconds": ["300", "30"], "properties": ["a, horn, run", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a horn?", "label": 0}, {"captions": ["a child yells and another yells", "water is sprayed across a hard surface"], "sample_ids": ["vMDHu7Lxcgw", "sQwlkXjQabo"], "start_seconds": ["410", "10"], "properties": ["two, yell, child", "water, spray, surface"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "a car accelerates and wind blows"], "sample_ids": ["yaln9y8I7ms", "u0TrcHhkPQ"], "start_seconds": ["230", "20"], "properties": ["female, flushes, toilet", "accelerates, wind, blows"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a toilet flushes and a man speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "people applaud and hoot and chat quietly"], "sample_ids": ["x5cuQjOdM3E", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["cat, talk, meow", "people, applaud, hoot"], "captions_pred_video": ["a black background with an airplane flying in the sky", null], "captions_pred_audio": ["a cat meows and a woman speaks", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "water pouring and bubbling"], "sample_ids": ["zkKdxzNC97Y", "uyRfq-jKPpo"], "start_seconds": ["27", "50"], "properties": ["loud, bang, noise", "water, bubbles, pouring"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a door is opened and closed", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "a man speaks followed by another man speaking outside"], "sample_ids": ["u6jIvCtKarQ", "viuTg1M-dqg"], "start_seconds": ["70", "30"], "properties": ["a, man, speaks", "two men, speak, follow"], "captions_pred_video": ["footage of a person using a blender on a stove top", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "three men talk while wind blows and some liquid flows"], "sample_ids": ["zuua6-5goWw", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["birds, chirp, quiet, man, speaks", "three men, wind, flow"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "water is sprayed across a hard surface"], "sample_ids": ["u5RmF3c3Aw", "sQwlkXjQabo"], "start_seconds": ["60", "10"], "properties": ["engine, car, zoom", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "winds blows roughly as a vehicle races past"], "sample_ids": ["wyllXV6PjKo", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["a baby, a woman, a man", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a woman speaks and a baby cries", "a jet engine roars and wind blows "], "question": "which entity is a person", "label": 0}, {"captions": ["animals bleat and cry out and then a woman speaks", "wind blowing followed by a zoom"], "sample_ids": ["yZp6xizR0yU", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["animal, bleat, cry", "wind, blow, zoom"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "some clanking with distant murmuring"], "sample_ids": ["yYEVLuqEytU", "uMTTDZ2mb4"], "start_seconds": ["40", "30"], "properties": ["animal, pig, background", "clanking, murmuring, distant"], "captions_pred_video": ["a baby goat is being petted by a person's hand", null], "captions_pred_audio": ["several sheep bleat and a man speaks", "people are talking and a car is driving by with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["children cheer as a man speaks then an audience screams", "a mechanical buzzing getting louder"], "sample_ids": ["vJvryTwuAV8", "sEprKHm8Sj8"], "start_seconds": ["16", "90"], "properties": ["audience, cheer, man", "noise, loud, buzzing"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a race car accelerates and revs its engine "], "question": "which entity is a noise", "label": 1}, {"captions": ["a helicopter engine runs continuously", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["ugHJF0hfYkg", "sLUnaPT5gM8"], "start_seconds": ["10", "0"], "properties": ["engine, running, continuously", "loud, laughter, intermittent"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a helicopter is flying overhead ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is intermittent", "label": 1}, {"captions": ["a duck quacks several times", "some tunes played by whistling"], "sample_ids": ["vh30P49Po6s", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["quacks, duck, several", "tune, play, whistling"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a duck is quacking loudly", "a person whistling a song"], "question": "which is not a musical instrument", "label": 0}, {"captions": ["an engine runs loudly", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vqZuVbG6-HI", "vb1fPSDI4c"], "start_seconds": ["130", "30"], "properties": ["loud, engine, run", "multiple, people, yell"], "captions_pred_video": ["footage is blurry because it's raining outside", null], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a crowd of people are talking and laughing"], "question": "which entity is quieter", "label": 1}, {"captions": ["a small engine idles continuously", "people applaud and hoot and chat quietly"], "sample_ids": ["y5WII6cTH7k", "wwyfGO2J4"], "start_seconds": ["40", "90"], "properties": ["engine, idle, continuously", "people, applaud, hoot"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", null], "captions_pred_audio": ["an engine is knocking and vibrating ", "people are clapping and speaking with background noise "], "question": "which entity is a human activity", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "wind blows and women speak as livestock vocalizes"], "sample_ids": ["y8WEcpOlT3I", "vXlk0lIQBFo"], "start_seconds": ["40", "470"], "properties": ["harsh, wind, blows", "wind, speak, vocalize"], "captions_pred_video": ["on how to use a sewing machine youtube", "- a woman and two donkeys in a fenced in area"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "wind chimes are ringing and people are speaking and laughing "], "question": "which entity has a harsher wind blowing", "label": 0}, {"captions": ["multiple ducks quack continuously", "vehicles pass by on a roadway"], "sample_ids": ["wfHeoPDLMaM", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["multiple, quack, continuously", "pass, vehicle, roadway"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "footage of a fire truck entering a garage"], "captions_pred_audio": ["ducks are quacking", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "a child speaks in closed space"], "sample_ids": ["wyllXV6PjKo", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["a baby, a woman, a man", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman speaks and a baby cries", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a child?", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "a woman speaks as frying food sizzles"], "sample_ids": ["ziUT9IFTkjg", "wTideSjRFS0"], "start_seconds": ["10", "30"], "properties": ["background, birds, rustling", "food, sizzle, woman"], "captions_pred_video": [null, "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a woman is speaking while water is running in the background"], "question": "which entity is about a woman speaking?", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "small dogs yip and bark sharply"], "sample_ids": ["t25U-v4k4ts", "v-wcQf4BDY0"], "start_seconds": ["40", "120"], "properties": ["a, chirps, bird", "bark, yip, sharply"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks as a machine runs", "people speak as gunfire rings out"], "sample_ids": ["vD6lYD1l0BY", "wqTCwqVRDlk"], "start_seconds": ["330", "80"], "properties": ["a, machine, run", "gunfire, ring, speak"], "captions_pred_video": ["game controller being held in the hands of the person", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["someone whistles a tune", "someone whistles a tune"], "sample_ids": ["sIXTftIuUgw", "sIXTftIuUgw"], "start_seconds": ["90", "90"], "properties": ["someone, tune, whistle", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person whistling a song", "a person whistling a song"], "question": "which entity is a person", "label": 1}, {"captions": ["an airplane engine roars increasingly louder", "some men converse over an engine running"], "sample_ids": ["vBslzh7saPw", "sCiy7QS1U"], "start_seconds": ["90", "300"], "properties": ["engine, roar, louder", "men, converse, engine"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a conversation over an engine running?", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xKB8O8LTs6s", "tDVADusiIoc"], "start_seconds": ["70", "60"], "properties": ["music, radio, gunshots", "water, radio, man"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a woman speaking over a radio?", "label": 0}, {"captions": ["water flows followed by women screaming", "dishes cling together then a man begins to speak"], "sample_ids": ["w5W5Kqtc8E", "sQGXqGcwOTc"], "start_seconds": ["100", "3"], "properties": ["water, flow, women", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a man speaks as a car is passing by"], "sample_ids": ["vlJS7LN2XyM", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["background, clocks, ticking", "a, car, pass"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a dog barks and whimpers", "water splashing and a person laughs in the distance then a man speaks nearby"], "sample_ids": ["sShpyu2l4YQ", "vddP56-ogds"], "start_seconds": ["0", "30"], "properties": ["barks, whimpers, dog", "water, splash, person, laugh"], "captions_pred_video": ["the puppies are playing with a toy", null], "captions_pred_audio": ["a dog is barking and growling", "water is running and gurgling and a man is speaking"], "question": "which entity is more active", "label": 1}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "a clock ticktocks"], "sample_ids": ["wnpJndXuxLc", "v-g-j2uTByM"], "start_seconds": ["50", "30"], "properties": ["beeps, loud, whistle", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a clock is ticking loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["a woman sneezes then speaks", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["x4dZyf9Gbj0", "xKB8O8LTs6s"], "start_seconds": ["130", "70"], "properties": ["sneezes, speaks, woman", "music, gunfire, explosion"], "captions_pred_video": ["footage is blurry and out of focus", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman sneezes and speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["a motorcycle engine works nearby", "pigeons vocalize and birds chirp"], "sample_ids": ["tOSWIURC-4", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["engine, work, nearby", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a lawn mower is running ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a vehicle is skidding and squealing tires", "a telephone rings followed by a woman talking"], "sample_ids": ["soTOh3zYJfY", "tGcFnX0GHI"], "start_seconds": ["40", "0"], "properties": ["vehicle, skid, tires", "ring, talk, woman"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["a man speaks while video game music plays with some clicking", "someone is snoring while sleeping"], "sample_ids": ["tw76HGONaKg", "ujMt0-D-x2k"], "start_seconds": ["570", "0"], "properties": ["music, click, man", "snore, sleep, someone"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "of the dog playing with a toy on the floor"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a person sniffs and sneezes", "an infant crying frantically"], "sample_ids": ["uRlbY6aoBU", "zwOBqeFTgiU"], "start_seconds": ["0", "30"], "properties": ["sneezes, person, sniffs", "cry, infant, frantically"], "captions_pred_video": [null, "of the baby crying in the car seat"], "captions_pred_audio": ["a man is sneezing ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "water flows and trickles"], "sample_ids": ["wz7N8YRy74I", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["rooster, crow, background, men", "water, flow, trickle"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a vehicle accelerates and squeals tires"], "sample_ids": ["weDbePuc-Xc", "yRx9txMcBl0"], "start_seconds": ["40", "40"], "properties": ["music, slaps, human", "accelerates, tires, squeals"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a car is revving its engine and skidding "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a door slams shut roughly", "an engine runs loudly"], "sample_ids": ["zkKdxzNC97Y", "vqZuVbG6-HI"], "start_seconds": ["27", "130"], "properties": ["a door, slams, shut", "loud, engine, run"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a door is opened and closed", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "a child speaks in closed space"], "sample_ids": ["vfYTJq7nU", "yW6FWLSLkx4"], "start_seconds": ["130", "40"], "properties": ["ducks, quack, man", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a dog barks and whimpers", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sShpyu2l4YQ", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["barks, whimpers, dog", "three men, wind, flow"], "captions_pred_video": ["the puppies are playing with a toy", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a dog is barking and growling", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["there are rhythmical snoring nearby", "a child speaks in closed space"], "sample_ids": ["ujMt0-D-x2k", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["snoring, rhythmical, nearby", "child, space, speak"], "captions_pred_video": ["of the dog playing with a toy on the floor", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["water flows as men speak and yell", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vJ7JPEFhyLA", "zl9Dqx-j7q4"], "start_seconds": ["16", "6"], "properties": ["water, flow, men", "engine, laugh, loud"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "a car accelerates and wind blows"], "sample_ids": ["ylpYOorfH4o", "u0TrcHhkPQ"], "start_seconds": ["410", "20"], "properties": ["engine, run, loud", "accelerates, wind, blows"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", null], "captions_pred_audio": ["a man is speaking and an engine is revving", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "wind blows as people chatter quietly"], "sample_ids": ["yYEVLuqEytU", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["animal, pig, background", "wind, chatter, people"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "footage is blurry and out of focus"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["y2ZBGpgbhHM", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["animal, growl, bird", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["birds chirping and a dog panting", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man speaking to a rooster?", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vZAw4apG0Es", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["people, clock, converse", "multiple, people, yell"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["y2bVZ7rz-5M", "su6FAOcOA8c"], "start_seconds": ["280", "4"], "properties": ["engine, horn, siren", "engine, idle, woman"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "multiple people speak and children yell while water gurgles"], "sample_ids": ["xBxDz0CFVn0", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["stream, water, flow", "multiple, people, yell"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vBslzh7saPw", "w5W5Kqtc8E"], "start_seconds": ["90", "100"], "properties": ["power, scream, increase", "wind, blow, vehicle"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "a woman and man speak while food is frying"], "sample_ids": ["w2M4i1mklOA", "zk-xJGQU8-4"], "start_seconds": ["30", "130"], "properties": ["alarm, gears, turn", "food, man, woman"], "captions_pred_video": ["footage of an antique clock", "a man and a woman cooking in a wok on the stove"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a woman is speaking while dishes are clanging and music is playing in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "some people speak"], "sample_ids": ["sZPuqDgX2V0", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["commentator, race, track", "some people speak English, some people speak Spanish, some people speak French"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a woman is speaking and a dog is whimpering"], "question": "which entity is not a race", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "dishes cling together then a man begins to speak"], "sample_ids": ["y2ZBGpgbhHM", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["birds, tweet, pant", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["birds chirping and a dog panting", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["an electric engine works nearby followed by a child talking", "race cars go around a track as a man commentates"], "sample_ids": ["xSKJGCItUWE", "uZesmtKZGSw"], "start_seconds": ["10", "250"], "properties": ["engine, work, child", "car, track, man"], "captions_pred_video": ["footage of the helicopter flying in the room", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has a man commentating?", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "bees buzz and wind blows"], "sample_ids": ["vVhthZ45k3Y", "tMJne1a4AFI"], "start_seconds": ["30", "0"], "properties": ["cat, purr, hiss", "bees buzz, wind blows, bees"], "captions_pred_video": ["footage is blurry and out of focus", "a swarm of bees on the ground"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a swarm of bees buzzing around"], "question": "which entity is more active", "label": 1}, {"captions": ["water runs into a sink while men speak", "water pouring and bubbling"], "sample_ids": ["vzceMbklWc", "uyRfq-jKPpo"], "start_seconds": ["180", "50"], "properties": ["water, sink, run", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["water is running and a man is speaking", "water is running from a faucet"], "question": "which entity is more likely to be in a bathroom", "label": 1}, {"captions": ["someone is burping continuously", "a car accelerates and wind blows"], "sample_ids": ["y636gklDioE", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["burps, burps, burps", "accelerates, wind, blows"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", null], "captions_pred_audio": ["a person burps loudly several times", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a motor runs and stops, and animals squawk and croak", "wind blowing followed by a zoom"], "sample_ids": ["s4tUs779vBA", "vr8ZXjEBhMQ"], "start_seconds": ["160", "150"], "properties": ["a, sound, stop", "wind, blow, zoom"], "captions_pred_video": ["game in 10 words or less - screenshot 1", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a car is revving and a man is speaking ", "wind blows and a chainsaw cuts through wood "], "question": "which entity has a zoom?", "label": 1}, {"captions": ["water splashes as an animal walks through", "an infant crying frantically"], "sample_ids": ["w1ir-sZ3Im8", "zwOBqeFTgiU"], "start_seconds": ["90", "30"], "properties": ["animal, water, splashes", "cry, infant, frantically"], "captions_pred_video": ["footage of a group of people riding horses through a river", "of the baby crying in the car seat"], "captions_pred_audio": ["water splashes and gurgles as people speak", "a baby cries loudly"], "question": "which entity is a human", "label": 1}, {"captions": ["a woman speaks and other women and a man talk with her", "a man speaks as a car is passing by"], "sample_ids": ["vbpKkWvfOu4", "sK4u5T8hW78"], "start_seconds": ["560", "30"], "properties": ["a, woman, man", "a, car, pass"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["an electric engine works nearby followed by a child talking", "a person snores loudly multiple times at a close distance"], "sample_ids": ["xSKJGCItUWE", "sSMl2vc3ek"], "start_seconds": ["10", "20"], "properties": ["engine, work, child", "loud, multiple, distance"], "captions_pred_video": ["footage of the helicopter flying in the room", null], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["male speech with light ticking", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["xO-Q2BlIIPU", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["male, speech, ticking", "airplane, boy, fly"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video", "label": 1}, {"captions": ["a cat meows and children speak", "a person is burping while a girl speaks"], "sample_ids": ["x5cuQjOdM3E", "vdoxuJn9lTc"], "start_seconds": ["30", "40"], "properties": ["cat, speak, children", "person, burp, girl"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a group of young girls playing a video game together in a living room"], "captions_pred_audio": ["a cat meows and a woman speaks", "a child speaks followed by a burp"], "question": "which entity is speaking", "label": 1}, {"captions": ["a person is whistling", "wind loudly blowing while people speak in the background followed by a horn blowing"], "sample_ids": ["sIXTftIuUgw", "xjhAnI2q6hM"], "start_seconds": ["90", "6"], "properties": ["person, whistling, person", "wind, blow, loudly"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a person whistling a song", "a truck is revving its engine and a man is speaking "], "question": "which entity is blowing", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "a machine beeps continuously"], "sample_ids": ["sa6TLVbooCc", "y682ml90jGw"], "start_seconds": ["240", "11"], "properties": ["people, laugh, child", "beeps, machine, continuously"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", null], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "water pouring and bubbling"], "sample_ids": ["su6FAOcOA8c", "uyRfq-jKPpo"], "start_seconds": ["4", "50"], "properties": ["engine, run, woman", "water, bubbles, pouring"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["y4tPJXBKDig", "su6FAOcOA8c"], "start_seconds": ["20", "4"], "properties": ["a, noise, talk", "engine, idle, woman"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a woman is speaking and a subway train is moving "], "question": "which entity is a person", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vh30P49Po6s", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["loud, continuous, quacks", "applause, audience, yells"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking and a crowd is clapping"], "question": "which entity is more quiet", "label": 1}, {"captions": ["someone snores nearby", "a toilet flushes and water drains unevenly"], "sample_ids": ["spJCm8tD9Zo", "vhJWZheqaE"], "start_seconds": ["90", "0"], "properties": ["someone snores, nearby, someone", "water drains unevenly, toilet flushes, water drains"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a toilet is flushed"], "question": "which entity is a source of noise", "label": 0}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "a train horn blows as it passes by"], "sample_ids": ["y8dSeubCNI", "zVacuqSb4LI"], "start_seconds": ["4", "30"], "properties": ["men, women, car", "horn, blows, train"], "captions_pred_video": [null, "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["an engine revving and people talking in the background", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["an animal quacks rapidly", "wind blows as people chatter quietly"], "sample_ids": ["vh30P49Po6s", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["animal, quacks, rapidly", "wind, chatter, people"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage is blurry and out of focus"], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a clang followed by a toilet flushing", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wNZ5thZM7XU", "sSMl2vc3ek"], "start_seconds": ["20", "20"], "properties": ["sound, flush, toilet", "loud, multiple, distance"], "captions_pred_video": ["footage of a toilet in a bathroom stall", null], "captions_pred_audio": ["a toilet flushes", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a small engine spits as it runs", "people cheer as a vehicle engine revs"], "sample_ids": ["sZvwOuuPGP0", "xjhAnI2q6hM"], "start_seconds": ["50", "6"], "properties": ["spits, engine, runs", "engine revs, vehicle, people"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a medium engine is running ", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["s4Uz1Ffgo04", "yDoT73BWsdA"], "start_seconds": ["100", "10"], "properties": ["water, rushes, motorcycle", "engine, revs, vehicle"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "a man speaks as a motor runs in the background"], "sample_ids": ["yajyRTUQk3U", "xZepNM9qcRA"], "start_seconds": ["400", "30"], "properties": ["noise, woman, speak", "background, motor, run"], "captions_pred_video": ["- a woman cooking in the kitchen", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "water running down a sink while a man is talking"], "sample_ids": ["vKrYfzleLB8", "vSeGhaZt-aI"], "start_seconds": ["110", "50"], "properties": ["a, ring, gunshots", "water, sink, talk"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "a man is speaking and pouring liquid with background noise "], "question": "which entity is a video of a man talking?", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "a frog vocalizes while birds chirp"], "sample_ids": ["vXlk0lIQBFo", "vMf1dLD6Sng"], "start_seconds": ["470", "6"], "properties": ["wind, talk, vocalize", "frog, bird, vocalize"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "a frog in a pond with pink flowers in the background"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a frog croaks loudly"], "question": "which animal is vocalizing", "label": 1}, {"captions": ["a woman speaks followed by clicks and scraping", "a person snores loudly multiple times at a close distance"], "sample_ids": ["yYJksgsxx5U", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["audio, clicks, scraping", "loud, multiple, distance"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", null], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "a woman sneezes then speaks"], "sample_ids": ["xV7Mg1QucSc", "x4dZyf9Gbj0"], "start_seconds": ["14", "130"], "properties": ["alarm, ticktocks, laughs", "sneezes, speaks, woman"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "footage is blurry and out of focus"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a woman sneezes and speaks"], "question": "which entity is a woman?", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "a car speeding up in the distance"], "sample_ids": ["vb1fPSDI4c", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["multiple, people, yell", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a crowd of people are talking and laughing", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "a muffled toilet flushes and the water drains"], "sample_ids": ["vqZuVbG6-HI", "sfAvvZwdLCY"], "start_seconds": ["130", "20"], "properties": ["background, male, female", "flushes, drains, water"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a toilet is flushed"], "question": "which entity is a toilet?", "label": 1}, {"captions": ["a man speaks as a car is passing by", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["sK4u5T8hW78", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["a, car, pass", "a woman, a television program, a bird"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird nearby?", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["s7knHCFW82w", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["blow horn, get close, train", "music, gunfire, explosion"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "wind blows as people chatter quietly"], "sample_ids": ["tw76HGONaKg", "xBxDz0CFVn0"], "start_seconds": ["570", "30"], "properties": ["A, game, keyboard", "wind, chatter, people"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "footage is blurry and out of focus"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "a car accelerates and wind blows"], "sample_ids": ["zuua6-5goWw", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["birds, chirp, quiet, man, speaks", "accelerates, wind, blows"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", null], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaking with light rustling", "a clock ticktocks and sounds an alarm then a man laughs"], "sample_ids": ["zOZleIRqZm4", "xV7Mg1QucSc"], "start_seconds": ["80", "14"], "properties": ["light, rustling, man", "alarm, ticktocks, laughs"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "an alarm clock ticks and a woman laughs"], "question": "which entity is about a clock ticktocking?", "label": 1}, {"captions": ["vehicles pass by on a roadway", "a horn blasts as warning bells ring"], "sample_ids": ["tgbONvsP47Y", "zgUgkpk78xU"], "start_seconds": ["0", "70"], "properties": ["pass, vehicle, roadway", "horn, bells, ring"], "captions_pred_video": ["footage of a fire truck entering a garage", "of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108"], "captions_pred_audio": ["a car is driving on the road ", "a train blows its horn as it speeds down the tracks "], "question": "which entity is a warning", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "a child speaks in closed space"], "sample_ids": ["yRx9txMcBl0", "yW6FWLSLkx4"], "start_seconds": ["40", "40"], "properties": ["accelerates, tires, squeals", "child, space, speak"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["water flows as men speak and yell", "an airplane engine runs"], "sample_ids": ["vJ7JPEFhyLA", "yVPZ2MNWpms"], "start_seconds": ["16", "0"], "properties": ["water, flow, men", "engine, airplane, runs"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a man speaks while video game music plays with some clicking", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["tw76HGONaKg", "wz7N8YRy74I"], "start_seconds": ["570", "30"], "properties": ["music, click, man", "rooster, crow, background, men"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has more background noise", "label": 1}, {"captions": ["a man speaks as bees buzz and birds chirp", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["t25U-v4k4ts", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["bees buzz, birds chirp, man speaks", "a, scream, girl"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["u7C-AEBQM", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["ticks, rhythmic, quiet", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a ticktock of a clock", "a woman speaks while a helicopter flies overhead "], "question": "which entity is moving", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["zkKdxzNC97Y", "wqZ135Ssz0"], "start_seconds": ["27", "60"], "properties": ["hard, surface, door", "two men, woman, birds"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more moving parts", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["x6ijhqRY38s", "tiDFTC-5vU"], "start_seconds": ["250", "30"], "properties": ["something metal, glass, hit", "male, duck, laugh"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", null], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck?", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vf9xf3vMsGM", "uEU-Hg5MTN8"], "start_seconds": ["540", "27"], "properties": ["A man speaks while turning a water faucet on.", "a woman, laughs, animal"], "captions_pred_video": ["of the person washing their hands under the faucet", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a video of a person speaking?", "label": 0}, {"captions": ["people speak then an engine runs", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["uMTTDZ2mb4", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["engine, run, people", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio?", "label": 1}, {"captions": ["food is frying then a woman speaks", "a clock ticktocks"], "sample_ids": ["ukxt9I7eMMg", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["food, woman, speak", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "some men converse over an engine running"], "sample_ids": ["xjvTpk2Zpr8", "sCiy7QS1U"], "start_seconds": ["70", "300"], "properties": ["engine, run, wind", "men, converse, engine"], "captions_pred_video": ["footage of a dhl plane landing on the runway", null], "captions_pred_audio": ["a jet engine roars and wind blows ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a jet engine spools up and takes off", "a bird chirps in response to a woman chirping for the birds"], "sample_ids": ["vBslzh7saPw", "uOpoD0gGXcs"], "start_seconds": ["90", "120"], "properties": ["engine, spools, takes", "chirps, woman, bird"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "a herd of cows grazing in the field"], "captions_pred_audio": ["a jet engine roars and accelerates ", "birds are chirping and a man is speaking"], "question": "which entity is a response to a woman chirping?", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "some men converse over an engine running"], "sample_ids": ["uYT5gxnyMWM", "sCiy7QS1U"], "start_seconds": ["50", "300"], "properties": ["person, spray, yell", "men, converse, engine"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows a person speaking over spraying and another person yelling?", "label": 0}, {"captions": ["water splashes and a motorboat passes as people yell", "a large crowd cheers and applauds"], "sample_ids": ["w5W5Kqtc8E", "rqfQRErjfk8"], "start_seconds": ["100", "170"], "properties": ["water, splashes, motorboat", "crowd, cheers, applauds"], "captions_pred_video": [null, "a man hugging another man in front of an orchestra"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a crowd of people clapping and cheering"], "question": "which entity is more likely to be at a sporting event", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "a man laughs and speaks as cats purr and hiss"], "sample_ids": ["u7C-AEBQM", "vVhthZ45k3Y"], "start_seconds": ["30", "30"], "properties": ["ticks, rhythmic, quiet", "cat, purr, hiss"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and a cat is meowing"], "question": "which entity is more active", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "several insects fly while two men talk"], "sample_ids": ["uiItxDsDMFI", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["wood, piece, saw", "several, fly, men"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a saw is being used with background noise ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is not being sawed", "label": 1}, {"captions": ["birds vocalize and a man speaks", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["v0wPrLBI3hg", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["vocalize, bird, speak", "two men, woman, birds"], "captions_pred_video": ["footage of the pigeons feeding on the ground", null], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more birds", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "a horn rings out as a machine runs by"], "sample_ids": ["rwTERCUno", "slZLHwNbbt4"], "start_seconds": ["90", "300"], "properties": ["engine, idle, sputter", "a, horn, run"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["an engine is idling and vibrating", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a person screams glaringly", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xC8kbrKJmco", "tDVADusiIoc"], "start_seconds": ["0", "60"], "properties": ["glaringly, screams, person", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a goat is bleating ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["a infant makes noise and is excited", "water drips and bubbles as a man speaks"], "sample_ids": ["wIJK3-5y0kA", "vSeGhaZt-aI"], "start_seconds": ["30", "50"], "properties": ["noise, excited, infant", "water, bubbles, speak"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking and pouring liquid with background noise "], "question": "which entity is a liquid", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "a man speaks over intermittent keyboard taps"], "sample_ids": ["sShpyu2l4YQ", "tw76HGONaKg"], "start_seconds": ["0", "570"], "properties": ["growl, bark, yip", "audio, man, keyboard"], "captions_pred_video": ["the puppies are playing with a toy", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a dog is barking and growling", "a man speaks and types on a computer keyboard "], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "a woman speaks as she rubs two objects together"], "sample_ids": ["t97k0cejSQE", "vzxHnu-SFEw"], "start_seconds": ["250", "80"], "properties": ["bird, chirp, insect", "two objects, woman, speak"], "captions_pred_video": ["a bee on a purple thistle flower", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["an engine runs and wind blows", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vs65y4qmyBE", "uYT5gxnyMWM"], "start_seconds": ["340", "50"], "properties": ["engine, run, wind", "female, spraying, scream"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a beep occurs briefly", "a car accelerates and wind blows"], "sample_ids": ["xtWeJ56-U-g", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["beep, occur, briefly", "accelerates, wind, blows"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", null], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a vehicle engine runs as a siren and horn sound", "a stream of water runs briefly"], "sample_ids": ["u--KhUW8l1Y", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["sound, vehicle, horn", "stream, water, run"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "small dogs yip and bark sharply"], "sample_ids": ["slZLHwNbbt4", "v-wcQf4BDY0"], "start_seconds": ["300", "120"], "properties": ["clap, distance, horn", "bark, yip, sharply"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a airplane flies overhead as a woman speaks", "an electric engine works nearby followed by a child talking"], "sample_ids": ["zj2R0XoFr5k", "xSKJGCItUWE"], "start_seconds": ["50", "10"], "properties": ["airplane, fly, woman", "engine, work, child"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage of the helicopter flying in the room"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a high pitched engine is running and a child speaks"], "question": "which entity is a machine", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "people cheer as a vehicle engine revs"], "sample_ids": ["wTjoRj1se3U", "xjhAnI2q6hM"], "start_seconds": ["390", "6"], "properties": ["engine, run, people", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a jet engine is running and people are talking", "a truck is revving its engine and a man is speaking "], "question": "which entity has more people", "label": 1}, {"captions": ["water flows and trickles", "a clock ticktocks and sounds an alarm then a man laughs"], "sample_ids": ["tB7hWb9gTuQ", "xV7Mg1QucSc"], "start_seconds": ["30", "14"], "properties": ["water, flow, trickle", "alarm, ticktocks, laughs"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "a cuckoo clock hanging on the wall"], "captions_pred_audio": ["water is splashing and gurgling", "an alarm clock ticks and a woman laughs"], "question": "which entity is not a clock?", "label": 0}, {"captions": ["men speak and a nozzle sprays liquid", "an airplane engine spools and people speak"], "sample_ids": ["wRV8yMk886E", "wTjoRj1se3U"], "start_seconds": ["0", "390"], "properties": ["liquid, spray, nozzle", "airplane, engine, spool"], "captions_pred_video": ["two cars are parked in a parking lot at night", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a jet engine is running and people are talking"], "question": "which entity is a moving object", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "several insects fly while two men talk"], "sample_ids": ["sfAvvZwdLCY", "s-T9OVOiMLo"], "start_seconds": ["20", "330"], "properties": ["flushes, drains, water", "several, fly, men"], "captions_pred_video": ["footage of the toilet in the bathroom", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a video of a toilet flushing?", "label": 0}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["yZmhM1HcsyE", "ziUT9IFTkjg"], "start_seconds": ["4", "10"], "properties": ["engine, roar, water", "background, birds, rustling"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", null], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["s4Uz1Ffgo04", "wqZ135Ssz0"], "start_seconds": ["100", "60"], "properties": ["water, rushes, motorcycle", "two men, woman, birds"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "pigeons vocalize and birds chirp"], "sample_ids": ["y8WEcpOlT3I", "uiS58TNyUiw"], "start_seconds": ["40", "430"], "properties": ["harsh, wind, blows", "vocalize, bird, chirp"], "captions_pred_video": ["on how to use a sewing machine youtube", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a person", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "birds chirp and an owl hoots before a man speaks briefly"], "sample_ids": ["vf9xf3vMsGM", "wRBHTgrbiwg"], "start_seconds": ["540", "50"], "properties": ["A man speaks while turning a water faucet on.", "bird, owl, speak"], "captions_pred_video": ["of the person washing their hands under the faucet", "of a bee pollinating the flowers in the field"], "captions_pred_audio": ["a man is speaking while water is running in the background", "birds are chirping and insects are buzzing"], "question": "which entity has a man speaking to an owl?", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "water bubbles and gurgles."], "sample_ids": ["vb1fPSDI4c", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["multiple, people, yell", "bubbles, gurgles, water"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a crowd of people are talking and laughing", "water is splashing and gurgling"], "question": "which entity has more bubbles", "label": 1}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "a stream of water runs briefly"], "sample_ids": ["smDKStoHBJo", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["a, infant, speaking", "stream, water, run"], "captions_pred_video": ["a man holding a crying baby in his arms", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["yajyRTUQk3U", "uEU-Hg5MTN8"], "start_seconds": ["400", "27"], "properties": ["noise, woman, speak", "a woman, laughs, animal"], "captions_pred_video": ["- a woman cooking in the kitchen", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a woman is speaking and a baby is crying"], "question": "which woman is speaking over sizzling noise", "label": 0}, {"captions": ["a woman speaks with water running", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["wTideSjRFS0", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["water, running, woman", "beeps, hit, woman"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "paper folding and crinkling"], "sample_ids": ["xO-Q2BlIIPU", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["two men, exclamation, speak", "paper, fold, crinkle"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "the wind blows and a mouse clicks "], "question": "which is not a person", "label": 1}, {"captions": ["an airplane engine roars increasingly louder", "an airplane engine runs"], "sample_ids": ["vBslzh7saPw", "yVPZ2MNWpms"], "start_seconds": ["90", "0"], "properties": ["engine, roar, louder", "engine, airplane, runs"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a car is driving by on the road "], "question": "which entity is a video of an airplane engine?", "label": 0}, {"captions": ["a goat bleats and someone makes a calling noise", "a infant makes noise and is excited"], "sample_ids": ["vlS6YMeWAPo", "wIJK3-5y0kA"], "start_seconds": ["40", "30"], "properties": ["noise, bleat, call", "noise, excited, infant"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a goat bleats and birds chirp", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "several insects fly while two men talk"], "sample_ids": ["xzKKf9bKNUo", "s-T9OVOiMLo"], "start_seconds": ["10", "330"], "properties": ["background, noise, snoring", "several, fly, men"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about insects?", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "a stream of water runs briefly"], "sample_ids": ["uYT5gxnyMWM", "x-PeY8Yb8M4"], "start_seconds": ["50", "300"], "properties": ["a, scream, girl", "stream, water, run"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "a propeller rotates loudly and intensely"], "sample_ids": ["siJFXfGWgDk", "ugHJF0hfYkg"], "start_seconds": ["50", "10"], "properties": ["a, bird, vehicle", "loud, intense, propeller"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "an insect buzzes around continuously"], "sample_ids": ["wTideSjRFS0", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["food, sizzle, woman", "buzzes, continuously, insect"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "an infant crying as a woman laughs"], "sample_ids": ["ylpYOorfH4o", "xhmRY9yhC7c"], "start_seconds": ["410", "20"], "properties": ["engine, run, loud", "a, laugh, infant"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "water flows and trickles"], "sample_ids": ["ukxt9I7eMMg", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["continuous, woman, speaking", "water, flow, trickle"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "water is splashing and gurgling"], "question": "which entity is a video of water flowing and trickling?", "label": 1}, {"captions": ["a cat meows and children speak", "plastic is tapped on while someone speaks"], "sample_ids": ["x5cuQjOdM3E", "wvKpEYswXO0"], "start_seconds": ["30", "150"], "properties": ["cat, speak, children", "plastic, tap, speak"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a cat meows and a woman speaks", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["an electric engine works nearby followed by a child talking", "a man speaks as a motor runs in the background"], "sample_ids": ["xSKJGCItUWE", "xZepNM9qcRA"], "start_seconds": ["10", "30"], "properties": ["engine, work, child", "background, motor, run"], "captions_pred_video": ["footage of the helicopter flying in the room", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a drill runs and two people laugh", "a man speaks while rain falls onto a hard surface"], "sample_ids": ["tEE3MpBt1sg", "wqN6IIHw3po"], "start_seconds": ["50", "30"], "properties": ["two people, laugh, drill", "rain, surface, fall"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "in your own words what is happening in this screenshot? blood splattered all over the place"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man is speaking and water is splashing"], "question": "which entity is a video of a drill running and two people laughing?", "label": 0}, {"captions": ["a man talks as several small engines run", "water flows and trickles"], "sample_ids": ["u9A6VZQCZpU", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["a, man, talk", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["xzKKf9bKNUo", "vfYTJq7nU"], "start_seconds": ["10", "130"], "properties": ["background, noise, snoring", "rustling, ducks, quack"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", null], "captions_pred_audio": ["a person snoring loudly", "a duck quacks and a woman speaks"], "question": "which entity has more rustling", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["siJFXfGWgDk", "tDlysoZiA1I"], "start_seconds": ["50", "0"], "properties": ["a, bird, vehicle", "animal, grunts, chirps"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "birds are chirping and a rooster is crowing "], "question": "which entity has more birds", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "sucking and grunting followed by slurping with birds in the background"], "sample_ids": ["sShpyu2l4YQ", "yYEVLuqEytU"], "start_seconds": ["0", "40"], "properties": ["growl, bark, yip", "grunt, slurp, background"], "captions_pred_video": ["the puppies are playing with a toy", "a baby goat is being petted by a person's hand"], "captions_pred_audio": ["a dog is barking and growling", "several sheep bleat and a man speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["xjvTpk2Zpr8", "y8WEcpOlT3I"], "start_seconds": ["70", "40"], "properties": ["wind, blows, vehicle", "harsh, wind, blows"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a man is speaking with wind noise in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vW4x7S1VfQc", "wDVMhEdTiVw"], "start_seconds": ["150", "30"], "properties": ["clacking, oil, woman", "gun, shoot, water"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["food sizzles in a frying pan", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is about shooting something?", "label": 1}, {"captions": ["an adult woman speaks over chopping and silverware noises", "several insects fly while two men talk"], "sample_ids": ["yYJksgsxx5U", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["audio, woman, silverware", "several, fly, men"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["a man woman speak while crickets sing", "paper is crumpling consistently"], "sample_ids": ["zTLVJCo4WEE", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["a, crickets, sing", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman speaks and crickets chirp", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "water flows and trickles"], "sample_ids": ["vf9xf3vMsGM", "tB7hWb9gTuQ"], "start_seconds": ["540", "30"], "properties": ["A man speaks while turning a water faucet on.", "water, flow, trickle"], "captions_pred_video": ["of the person washing their hands under the faucet", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking while water is running in the background", "water is splashing and gurgling"], "question": "which entity is a video of water flowing and trickling?", "label": 1}, {"captions": ["people speak and tapping occurs", "a person snores loudly multiple times at a close distance"], "sample_ids": ["tFCUUGdREgA", "sSMl2vc3ek"], "start_seconds": ["70", "20"], "properties": ["people, tap, speak", "loud, multiple, distance"], "captions_pred_video": ["a person riding a white horse in an indoor arena", null], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a person sniffles and sneezes", "birds chirp and objects are moved around"], "sample_ids": ["uRlbY6aoBU", "yPUYU6t3rwo"], "start_seconds": ["0", "370"], "properties": ["sneezes, sniffles, person", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is sneezing ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["wind blows and a stream of water flows nearby", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sYITalLZjj4", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["stream, flow, wind", "music, gunfire, explosion"], "captions_pred_video": ["two ducks are swimming in the water near each other", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["wind blows and birds chirp", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "birds chirp, a woman speaks, and insects buzz"], "sample_ids": ["sofxkNWaP0s", "t97k0cejSQE"], "start_seconds": ["30", "250"], "properties": ["wind, engine, louder", "sound, chirp, buzz"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "a bee on a purple thistle flower"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a bee buzzes and a woman speaks"], "question": "which entity has a higher pitch", "label": 1}, {"captions": ["a stream of water flows quickly", "a car speeding up in the distance"], "sample_ids": ["wbHTKEJZyhc", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["stream, water, flow", "distance, car, speed"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", null], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["wind blows strongly", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["w8uLijTqtlU", "xKB8O8LTs6s"], "start_seconds": ["70", "70"], "properties": ["wind, blows, strongly", "music, gunfire, explosion"], "captions_pred_video": ["footage is blurry and shaky", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["the wind is blowing strongly", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a door opens and birds chirp", "people cheer as a vehicle engine revs"], "sample_ids": ["yeFvk9x0wWI", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["door, open, birds", "engine revs, vehicle, people"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a truck is revving its engine and a man is speaking "], "question": "which entity is more likely to be in a vehicle?", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "a motorcycle idles loudly as wind blows"], "sample_ids": ["tDVADusiIoc", "v7jJS8aAyA"], "start_seconds": ["60", "10"], "properties": ["wind, radio, waves", "wind, blows, loudly"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a motorcycle engine is idling and vibrating"], "question": "which entity is moving through the water", "label": 0}, {"captions": ["someone is snoring while sleeping", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["ujMt0-D-x2k", "xKB8O8LTs6s"], "start_seconds": ["0", "70"], "properties": ["snore, sleep, someone", "music, gunfire, explosion"], "captions_pred_video": ["of the dog playing with a toy on the floor", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a person is snoring loudly", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more calm", "label": 0}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "popping and crackling repeats as men yell and laugh"], "sample_ids": ["zkKdxzNC97Y", "rqu8iB22IY"], "start_seconds": ["27", "5"], "properties": ["loud, bang, noise", "sound, repeats, laugh"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "a dog barks and a man speaks while music plays "], "question": "which entity has a loud bang followed by a softer banging noise?", "label": 0}, {"captions": ["a child babbles as a woman speaks", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wEBlkGWVWwE", "vJ7JPEFhyLA"], "start_seconds": ["260", "16"], "properties": ["a, babble, woman", "three men, wind, flow"], "captions_pred_video": ["shows a person writing on the whiteboard", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "vehicles pass by on a roadway"], "sample_ids": ["spJCm8tD9Zo", "tgbONvsP47Y"], "start_seconds": ["90", "0"], "properties": ["snores, wheezes, sleeps", "pass, vehicle, roadway"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a person is snoring loudly", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a person snores loudly multiple times at a close distance", "someone whistles a tune"], "sample_ids": ["sSMl2vc3ek", "sIXTftIuUgw"], "start_seconds": ["20", "90"], "properties": ["loud, multiple, distance", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person snoring loudly", "a person whistling a song"], "question": "which is not a musical instrument", "label": 0}, {"captions": ["a person snores loudly multiple times at a close distance", "a car accelerates and wind blows"], "sample_ids": ["sSMl2vc3ek", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["loud, multiple, distance", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person snoring loudly", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a speedboat passes quickly on the water", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tjmoSi330GM", "zj2R0XoFr5k"], "start_seconds": ["23", "50"], "properties": ["speed, water, boat", "airplane, boy, fly"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "ticking continues without interruption"], "sample_ids": ["vVhthZ45k3Y", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["cat, purr, hiss", "ticking, continuous, clock"], "captions_pred_video": ["footage is blurry and out of focus", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a clock is ticking loudly"], "question": "which entity is a clock", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "an infant crying as a woman laughs"], "sample_ids": ["sWZzXuWYY", "xhmRY9yhC7c"], "start_seconds": ["420", "20"], "properties": ["male, clanks, thumps", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "a toilet flushes and water drains"], "sample_ids": ["zFjIWfSD-4", "sfAvvZwdLCY"], "start_seconds": ["410", "20"], "properties": ["People, motor, brakes", "water drains, flushes, water"], "captions_pred_video": [null, "footage of the toilet in the bathroom"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "a woman speaks happily and an animal chirps"], "sample_ids": ["s7knHCFW82w", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["blow horn, get close, train", "a woman, chirps, animal"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", null], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a woman is speaking and a dog is barking "], "question": "which entity is a person", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["uPDn2BFTHk", "w34HjHr6gAY"], "start_seconds": ["140", "30"], "properties": ["lady, laugh, baby", "beeps, hit, woman"], "captions_pred_video": [null, "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["uRExseg-0XI", "y2bVZ7rz-5M"], "start_seconds": ["210", "280"], "properties": ["woman, man, water", "motor noise, horn, siren"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a truck is honking its horn and a siren is blaring "], "question": "which entity has a horn honking?", "label": 1}, {"captions": ["humming and rattling of an engine idling as it revs", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["xMXvkIcaG0Y", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["sound, humming, rattling", "engine, revs, vehicle"], "captions_pred_video": ["footage of a car's hood being opened up to reveal the engine underneath the hood", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["an engine is revving and accelerating ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a person speaks briefly", "paper folding and crinkling"], "sample_ids": ["zOZleIRqZm4", "zPpG3RD8lSs"], "start_seconds": ["80", "20"], "properties": ["person, talk, brief", "paper, fold, crinkle"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "the wind blows and a mouse clicks "], "question": "which is not a person", "label": 1}, {"captions": ["heavy rain splashes as it falls", "a rumbling clap in the distance followed by a horn and the rumbling grows louder"], "sample_ids": ["wP8ZKrlx3oA", "slZLHwNbbt4"], "start_seconds": ["40", "300"], "properties": ["fall, rain, splash", "clap, distance, horn"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is not a splash", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["xfudFO976zE", "tiDFTC-5vU"], "start_seconds": ["0", "30"], "properties": ["animal, bleats, cry", "male, duck, laugh"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a man is speaking and ducks are quacking"], "question": "which entity is speaking", "label": 1}, {"captions": ["distant humming of an engine", "a stream of water runs briefly"], "sample_ids": ["yVPZ2MNWpms", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["sound, distance, engine", "stream, water, run"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a car is driving by on the road ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a man talks as several small engines run", "a telephone rings followed by a woman talking"], "sample_ids": ["u9A6VZQCZpU", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["a, man, talk", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["birds chirps while a siren signals in the distance", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["uKCSGgof8gI", "vfYTJq7nU"], "start_seconds": ["12", "130"], "properties": ["chirps, distance, signal", "rustling, ducks, quack"], "captions_pred_video": ["footage of a street in a small town on a sunny day", null], "captions_pred_audio": ["a truck is accelerating and revving its engine ", "a duck quacks and a woman speaks"], "question": "which entity is about ducks?", "label": 1}, {"captions": ["a small engine idles continuously", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["y5WII6cTH7k", "xfaoyyzw2WU"], "start_seconds": ["40", "180"], "properties": ["engine, idle, continuously", "loud, jet engine, roar"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["an engine is knocking and vibrating ", "an aircraft engine roars and a man speaks "], "question": "which engine is louder", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "water flows as men speak and yell"], "sample_ids": ["sOa7g-44Dag", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["audio, scratching, man", "water, flow, men"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["animals bleat and cry out and then a woman speaks", "a baby cries and fusses, a woman speaks, and a man speaks"], "sample_ids": ["yZp6xizR0yU", "wyllXV6PjKo"], "start_seconds": ["30", "30"], "properties": ["animal, bleat, cry", "a baby, a woman, a man"], "captions_pred_video": ["footage of a woman feeding goats in a barn", null], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a woman speaks and a baby cries"], "question": "which entity is a human", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a vehicle accelerates and squeals tires"], "sample_ids": ["xSKJGCItUWE", "yRx9txMcBl0"], "start_seconds": ["10", "40"], "properties": ["engine, run, boy", "accelerates, tires, squeals"], "captions_pred_video": ["footage of the helicopter flying in the room", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a car is revving its engine and skidding "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["sfAvvZwdLCY", "tiDFTC-5vU"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "male, duck, laugh"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["food is frying then a woman speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["ukxt9I7eMMg", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["food, woman, speak", "rooster, crow, background, men"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in it?", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["u6jIvCtKarQ", "zj2R0XoFr5k"], "start_seconds": ["70", "50"], "properties": ["a, man, speaks", "airplane, boy, fly"], "captions_pred_video": ["footage of a person using a blender on a stove top", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["wind blows strongly", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["w8uLijTqtlU", "wqZ135Ssz0"], "start_seconds": ["70", "60"], "properties": ["wind, blows, strongly", "two men, woman, birds"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["x9JovgqUcs", "sLUnaPT5gM8"], "start_seconds": ["500", "0"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "loud, laughter, intermittent"], "captions_pred_video": [null, "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["young female child snoring and breathing deeply", "bees buzz and wind blows"], "sample_ids": ["sAam2NqGhLY", "tMJne1a4AFI"], "start_seconds": ["20", "0"], "properties": ["snoring, breathing, child", "bees buzz, wind blows, bees"], "captions_pred_video": ["of a little girl sleeping on a couch", "a swarm of bees on the ground"], "captions_pred_audio": ["a person is snoring", "a swarm of bees buzzing around"], "question": "which entity is moving", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "some men converse over an engine running"], "sample_ids": ["xyx6eNVEYRY", "sCiy7QS1U"], "start_seconds": ["380", "300"], "properties": ["loud, engine, muffles", "men, converse, engine"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", null], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows a man speaking?", "label": 0}, {"captions": ["a vehicle engine revs and tires squeal", "a car speeding up in the distance"], "sample_ids": ["yDoT73BWsdA", "u0TrcHhkPQ"], "start_seconds": ["10", "20"], "properties": ["engine revs, tires squeal, vehicle", "distance, car, speed"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a race car accelerates and revs its engine "], "question": "which vehicle is moving faster", "label": 1}, {"captions": ["a clock ticktocks in wind", "a clock ticktocks"], "sample_ids": ["yVumC9TGknc", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, wind", "ticktocks, clock, ticktocks"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a series of beeps and chirps", "a clock is ticking loudly"], "question": "which clock ticktocks", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["wudZTNBtVqc", "y8WEcpOlT3I"], "start_seconds": ["60", "40"], "properties": ["accelerates, engine, wind", "harsh, wind, blows"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking with wind noise in the background "], "question": "which entity is a video of a vehicle engine accelerating and wind blowing?", "label": 0}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "roadway noise occurs and a truck accelerates"], "sample_ids": ["yZmhM1HcsyE", "tgbONvsP47Y"], "start_seconds": ["4", "0"], "properties": ["engine, roar, water", "noise, truck, accelerate"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a car is driving on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["humming and rattling of an engine idling as it revs", "people cheer as a vehicle engine revs"], "sample_ids": ["xMXvkIcaG0Y", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["sound, humming, rattling", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a car's hood being opened up to reveal the engine underneath the hood", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["an engine is revving and accelerating ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle engine revs?", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "pigeons vocalize and birds chirp"], "sample_ids": ["xjvTpk2Zpr8", "uiS58TNyUiw"], "start_seconds": ["70", "430"], "properties": ["engine, run, wind", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "of the pigeon in the cage"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a woman speaks and is crumpling paper", "pigeons vocalize and birds chirp"], "sample_ids": ["xvDdE3zNf8Y", "uiS58TNyUiw"], "start_seconds": ["120", "430"], "properties": ["A, crumple, paper", "vocalize, bird, chirp"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "of the pigeon in the cage"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking and a bee is buzzing"], "question": "which entity is not a person", "label": 1}, {"captions": ["a airplane flies overhead as a woman speaks", "a door opens and closes"], "sample_ids": ["zj2R0XoFr5k", "vBHyYJ8pL0"], "start_seconds": ["50", "2"], "properties": ["airplane, fly, woman", "open, close, door"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", null], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is stationary", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["yajyRTUQk3U", "zj2R0XoFr5k"], "start_seconds": ["400", "50"], "properties": ["noise, woman, speak", "airplane, boy, fly"], "captions_pred_video": ["- a woman cooking in the kitchen", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["the rumbling of a bus followed by a soft male voice", "people cheer as a vehicle engine revs"], "sample_ids": ["vK93VuO0yNc", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["male voice, bus, rumble", "engine revs, vehicle, people"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a car drives by with wind noise in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a man speaks as crickets sing", "multiple motorcycles pass by as a man speaks"], "sample_ids": ["ryFDPxgDOGc", "zcDwZ6W7E3E"], "start_seconds": ["570", "180"], "properties": ["a, crickets, sing", "man, speak, motorcycles"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", "2 people riding motorcycles down a mountain road with trees lining the sides of the road"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking while a car accelerates and revs its engine "], "question": "which entity has a man speaking as multiple motorcycles pass by?", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a vehicle engine accelerating then running on idle"], "sample_ids": ["vBHyYJ8pL0", "vYkA3cfXp5Q"], "start_seconds": ["2", "30"], "properties": ["noise, door, opening", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "an engine is idling"], "question": "which entity is a vehicle", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "someone is typing on a computer keyboard"], "sample_ids": ["ukxt9I7eMMg", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["continuous, woman, speaking", "keyboard, type, computer"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "how to make money on youtube in spanish"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a person is typing on a keyboard"], "question": "which is a video", "label": 1}, {"captions": ["wind blowing and birds chirping with the distant cooing of a large bird", "a car speeding up in the distance"], "sample_ids": ["wRBHTgrbiwg", "u0TrcHhkPQ"], "start_seconds": ["50", "20"], "properties": ["birds, chirp, cooing", "distance, car, speed"], "captions_pred_video": ["of a bee pollinating the flowers in the field", null], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a person burps loudly for a long time nearby", "a woman speaks over sizzling noise"], "sample_ids": ["vf44CgrjT0A", "yajyRTUQk3U"], "start_seconds": ["20", "400"], "properties": ["loud, long, person", "noise, woman, speak"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a loud burp", "a woman is speaking while food is frying in the background"], "question": "which entity is quieter", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "a man speaks as birds chirp and a vehicle passes nearby"], "sample_ids": ["xERFUeZONz8", "siJFXfGWgDk"], "start_seconds": ["0", "50"], "properties": ["ring, approach, traffic", "a, bird, vehicle"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["an emergency vehicle siren blares", "a man is speaking and birds are chirping in the background "], "question": "which entity has a vehicle passing nearby?", "label": 1}, {"captions": ["water pouring and bubbling", "dishes cling together then a man begins to speak"], "sample_ids": ["uyRfq-jKPpo", "sQGXqGcwOTc"], "start_seconds": ["50", "3"], "properties": ["water, bubbles, pouring", "cling, speak, dishes"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["water is running from a faucet", "mechanisms are operating and water is splashing "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["people speak in a closed space", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["sTpirNYo8vQ", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["people, space, speak", "water, radio, man"], "captions_pred_video": ["of a man taking a selfie on a bus", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more likely to be in a closed space", "label": 0}, {"captions": ["a toilet flushes and a female speaks", "wind blowing followed by a zoom"], "sample_ids": ["yaln9y8I7ms", "vr8ZXjEBhMQ"], "start_seconds": ["230", "150"], "properties": ["female, flushes, toilet", "wind, blow, zoom"], "captions_pred_video": ["footage is blurry and out of focus", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a toilet flushes and a man speaks", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more likely to be a video of a toilet flushing?", "label": 0}, {"captions": ["scraping and female speech with distant music", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["yHeVV-xeOxQ", "zl9Dqx-j7q4"], "start_seconds": ["130", "6"], "properties": ["female, speech, music", "engine, laugh, loud"], "captions_pred_video": ["of a girl milking a goat's udder", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "vehicles pass by on a roadway"], "sample_ids": ["xyx6eNVEYRY", "tgbONvsP47Y"], "start_seconds": ["380", "0"], "properties": ["loud, engine, muffles", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "footage of a fire truck entering a garage"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a car is driving on the road "], "question": "which vehicle is moving", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "a car speeding up in the distance"], "sample_ids": ["uYT5gxnyMWM", "u0TrcHhkPQ"], "start_seconds": ["50", "20"], "properties": ["a, scream, girl", "distance, car, speed"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a person is whistling", "multiple people speak and children yell while water gurgles"], "sample_ids": ["sIXTftIuUgw", "vb1fPSDI4c"], "start_seconds": ["90", "30"], "properties": ["person, whistling, person", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person whistling a song", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "people applaud and hoot and chat quietly"], "sample_ids": ["sU53zg9Jp7s", "wwyfGO2J4"], "start_seconds": ["380", "90"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "people, applaud, hoot"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", null], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "a woman speaks as she rubs two objects together"], "sample_ids": ["yZmhM1HcsyE", "vzxHnu-SFEw"], "start_seconds": ["4", "80"], "properties": ["engine, roar, water", "two objects, woman, speak"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a machine?", "label": 0}, {"captions": ["a man speaks while water drains", "motors rev and run loudly as a person laughs"], "sample_ids": ["vSeGhaZt-aI", "zl9Dqx-j7q4"], "start_seconds": ["50", "6"], "properties": ["water, drain, man", "motors rev, laugh, loudly"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a jet engine roars "], "question": "which entity is more quiet", "label": 0}, {"captions": ["people speak softly as food sizzles", "dishes cling together then a man begins to speak"], "sample_ids": ["yhQ2Lg-7qDY", "sQGXqGcwOTc"], "start_seconds": ["130", "3"], "properties": ["food, sizzle, speak", "cling, speak, dishes"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a faucet is running and a man is speaking", "mechanisms are operating and water is splashing "], "question": "which entity is about speaking?", "label": 0}, {"captions": ["a woman speaks as frying food sizzles", "a crowd yells, reacts and applauds"], "sample_ids": ["wTideSjRFS0", "wztCSUxOf8"], "start_seconds": ["30", "130"], "properties": ["food, sizzle, woman", "a crowd, yells, applauds"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "paper folding and crinkling"], "sample_ids": ["yDoT73BWsdA", "zPpG3RD8lSs"], "start_seconds": ["10", "20"], "properties": ["engine, revs, vehicle", "paper, fold, crinkle"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "the wind blows and a mouse clicks "], "question": "which is not a vehicle", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "a clock ticktocks"], "sample_ids": ["wTideSjRFS0", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["food, sizzle, woman", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["multiple insects buzz over rustling wind", "a car accelerates and wind blows"], "sample_ids": ["tMJne1a4AFI", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["wind, buzz, rustling", "accelerates, wind, blows"], "captions_pred_video": ["a swarm of bees on the ground", null], "captions_pred_audio": ["a swarm of bees buzzing around", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "children cheer as a man speaks then an audience screams"], "sample_ids": ["wqZ135Ssz0", "vJvryTwuAV8"], "start_seconds": ["60", "16"], "properties": ["two men, woman, birds", "audience, cheer, man"], "captions_pred_video": [null, "a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a man is speaking and a crowd is shouting and whooping "], "question": "which entity has more people", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "an engine starts and increases in power"], "sample_ids": ["smDKStoHBJo", "zjTG0gaGCUI"], "start_seconds": ["0", "80"], "properties": ["a, talk, baby, cry", "power, increase, engine"], "captions_pred_video": ["a man holding a crying baby in his arms", null], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a jet engine roars as wind blows "], "question": "which entity is a moving object", "label": 1}, {"captions": ["paper folding and crinkling", "water flows and trickles"], "sample_ids": ["zPpG3RD8lSs", "tB7hWb9gTuQ"], "start_seconds": ["20", "30"], "properties": ["paper, fold, crinkle", "water, flow, trickle"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "water is splashing and gurgling"], "question": "which entity is more likely to flow", "label": 1}, {"captions": ["an emergency siren wails as it passes", "an infant crying as a woman laughs"], "sample_ids": ["vGj1XLJvNrw", "xhmRY9yhC7c"], "start_seconds": ["0", "20"], "properties": ["wails, wails, pass", "a, laugh, infant"], "captions_pred_video": ["footage of a police car driving down a city street", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "a baby cries and a woman speaks"], "question": "which entity is a human", "label": 1}, {"captions": ["women speak and laugh as wind blows", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["un9VQlzgZM", "su6FAOcOA8c"], "start_seconds": ["5", "4"], "properties": ["wind, speak, laugh", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "a frog croaks as other frogs croak in the background"], "sample_ids": ["sYITalLZjj4", "yswmmRZFItk"], "start_seconds": ["30", "0"], "properties": ["water, rushes, background, birds", "background, frog, croak"], "captions_pred_video": ["two ducks are swimming in the water near each other", "a close up of a frog in the water"], "captions_pred_audio": ["wind blows and birds chirp", "a frog is croaking"], "question": "which entity is a croaking animal?", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["voJh2gJxXhA", "w34HjHr6gAY"], "start_seconds": ["50", "30"], "properties": ["music, frog, croak", "beeps, hit, woman"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["music is playing and crickets are chirping ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "winds blows roughly as a vehicle races past"], "sample_ids": ["wDVMhEdTiVw", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["gun, shoot, water", "wind, blows, vehicle"], "captions_pred_video": ["a blurry image of trees and water in the forest", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a man makes an exclamation, then another man speaks", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["xO-Q2BlIIPU", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["two men, exclamation, speak", "beeps, hit, woman"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["uiItxDsDMFI", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["wood, piece, saw", "a woman, laughs, animal"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a saw is being used with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "a telephone rings followed by a woman talking"], "sample_ids": ["x9JovgqUcs", "tGcFnX0GHI"], "start_seconds": ["500", "0"], "properties": ["a, man, speaks, keyboard", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks and types on a keyboard", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["a motorcycle engine is idling", "small dogs yip and bark sharply"], "sample_ids": ["vZAqdHZ81yA", "v-wcQf4BDY0"], "start_seconds": ["180", "120"], "properties": ["engine, motorcycle, idling", "bark, yip, sharply"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["an engine is idling loudly", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a toilet door squeaks as it is opened", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sdXV-ylviw", "vJ7JPEFhyLA"], "start_seconds": ["190", "16"], "properties": ["door, toilet, squeaks", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a dog barks and taps with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a still image?", "label": 0}, {"captions": ["multiple ducks quack continuously", "dishes cling together then a man begins to speak"], "sample_ids": ["wfHeoPDLMaM", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["multiple, quack, continuously", "cling, speak, dishes"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["ducks are quacking", "mechanisms are operating and water is splashing "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "people speak in the background as a clock ticktocks"], "sample_ids": ["wtDqrBygTcU", "vZAw4apG0Es"], "start_seconds": ["30", "30"], "properties": ["man, engine, run", "background, clock, ticktocks"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["a man is speaking and a motor is running", "a clock is ticking and people are talking"], "question": "which entity has a clock ticking in the background?", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "an airplane engine spools and people speak"], "sample_ids": ["uC9dtII1KDI", "wTjoRj1se3U"], "start_seconds": ["150", "390"], "properties": ["wind, gusts, distance", "airplane, engine, spool"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a jet engine is running and people are talking"], "question": "which entity is a moving object", "label": 1}, {"captions": ["a clock ticktocks continuously", "a man speaks over a running engine and blowing wind"], "sample_ids": ["vlJS7LN2XyM", "ylpYOorfH4o"], "start_seconds": ["30", "410"], "properties": ["ticktocks, clock, ticktocks continuously", "engine, running, wind"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and an engine is revving"], "question": "which entity is not a clock?", "label": 1}, {"captions": ["heavy rain splashes as it falls", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["wP8ZKrlx3oA", "w5W5Kqtc8E"], "start_seconds": ["40", "100"], "properties": ["fall, rain, splash", "wind, blow, vehicle"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", null], "captions_pred_audio": ["a heavy rain is falling on a surface", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a man makes an exclamation, then another man speaks", "a man speaks as a car is passing by"], "sample_ids": ["xO-Q2BlIIPU", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["two men, exclamation, speak", "a, car, pass"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["there are rhythmical snoring nearby", "an insect buzzes around continuously"], "sample_ids": ["ujMt0-D-x2k", "v25l1jef3JY"], "start_seconds": ["0", "0"], "properties": ["snoring, rhythmical, nearby", "buzzes, continuously, insect"], "captions_pred_video": ["of the dog playing with a toy on the floor", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a person is snoring loudly", "a fly is buzzing around a microphone "], "question": "which entity is not a person", "label": 1}, {"captions": ["rustling with distant murmuring", "an engine runs loudly"], "sample_ids": ["wnNNcxAPwGQ", "vqZuVbG6-HI"], "start_seconds": ["0", "130"], "properties": ["sound, distance, rustling", "loud, engine, run"], "captions_pred_video": ["footage of a yellow truck doing a burnout on a race track", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a crowd of people are talking and laughing while a skateboard rolls by ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp and objects are moved around", "roadway noise occurs and a truck accelerates"], "sample_ids": ["yPUYU6t3rwo", "tgbONvsP47Y"], "start_seconds": ["370", "0"], "properties": ["birds chirp, objects are moved around, birds", "noise, truck, accelerate"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "footage of a fire truck entering a garage"], "captions_pred_audio": ["insects buzz and a man speaks", "a car is driving on the road "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a weapon fires multiple times", "wind blowing followed by a zoom"], "sample_ids": ["sMC07Ucy7kg", "vr8ZXjEBhMQ"], "start_seconds": ["10", "150"], "properties": ["weapon, fire, multiple", "wind, blow, zoom"], "captions_pred_video": ["footage is from a car's point of view", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is not a weapon", "label": 1}, {"captions": ["a duck quacks several times", "a woman speaks as she rubs two objects together"], "sample_ids": ["vh30P49Po6s", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["quacks, duck, several", "two objects, woman, speak"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a duck is quacking loudly", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is speaking", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "people cheer as a vehicle engine revs"], "sample_ids": ["wAAkbZToh8", "xjhAnI2q6hM"], "start_seconds": ["0", "6"], "properties": ["burp, laugh, speak", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man burps and a woman speaks", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "a clock ticktocks"], "sample_ids": ["wudZTNBtVqc", "v-g-j2uTByM"], "start_seconds": ["60", "30"], "properties": ["accelerates, engine, wind", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["frogs croak and vocalize", "a stream of water flows as people talk and wind blows"], "sample_ids": ["yswmmRZFItk", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["croak, vocalize, frog", "stream, water, flow"], "captions_pred_video": ["a close up of a frog in the water", "footage is blurry and out of focus"], "captions_pred_audio": ["a frog is croaking", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vlS6YMeWAPo", "tiDFTC-5vU"], "start_seconds": ["40", "30"], "properties": ["noise, bleat, call", "male, duck, laugh"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", null], "captions_pred_audio": ["a goat bleats and birds chirp", "a man is speaking and ducks are quacking"], "question": "which entity is speaking", "label": 1}, {"captions": ["an aircraft engine runs", "a power tool runs and touches a surface"], "sample_ids": ["yLCORCnd35Q", "zfvPRf3chY"], "start_seconds": ["0", "290"], "properties": ["engine, aircraft, runs", "power tool, run, touch"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", null], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a man is speaking while a power tool is being used "], "question": "which entity is a machine", "label": 1}, {"captions": ["a clock ticktocks continuously", "small dogs yip and bark sharply"], "sample_ids": ["vlJS7LN2XyM", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["ticktocks, clock, ticktocks continuously", "bark, yip, sharply"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a ticktock of a clock", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a young woman speaks over spraying and another person yells", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["uYT5gxnyMWM", "wDVMhEdTiVw"], "start_seconds": ["50", "30"], "properties": ["person, spray, yell", "gun, shoot, water"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["an airplane engine runs", "an infant crying as a woman laughs"], "sample_ids": ["yVPZ2MNWpms", "xhmRY9yhC7c"], "start_seconds": ["0", "20"], "properties": ["engine, airplane, runs", "a, laugh, infant"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a car is driving by on the road ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zsLxS-uLJTw", "yajyRTUQk3U"], "start_seconds": ["20", "400"], "properties": ["horn, blast, train", "a woman, something, fried"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of a woman talking?", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "a whistling owl calls out repeatedly and insects screech"], "sample_ids": ["yajyRTUQk3U", "w6RTHR6AeAg"], "start_seconds": ["400", "40"], "properties": ["a woman, something, fried", "call, owl, screech"], "captions_pred_video": ["- a woman cooking in the kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "an owl hoots and mechanisms operate "], "question": "which entity is a bird?", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "a man speaks as a car is passing by"], "sample_ids": ["vs65y4qmyBE", "sK4u5T8hW78"], "start_seconds": ["340", "30"], "properties": ["engine, run, man", "a, car, pass"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking while a machine engine runs?", "label": 0}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "an infant crying frantically"], "sample_ids": ["zY3icUyMdh8", "zwOBqeFTgiU"], "start_seconds": ["20", "30"], "properties": ["dog, bark, engine", "cry, infant, frantically"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "of the baby crying in the car seat"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "a stream of water runs briefly"], "sample_ids": ["yZmhM1HcsyE", "x-PeY8Yb8M4"], "start_seconds": ["4", "300"], "properties": ["engine, roar, water", "stream, water, run"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "frogs croak and vocalize"], "sample_ids": ["sU53zg9Jp7s", "yswmmRZFItk"], "start_seconds": ["380", "0"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "croak, vocalize, frog"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "a close up of a frog in the water"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a frog is croaking"], "question": "which animal is vocalizing", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["yVumC9TGknc", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["humming, clock, birds", "water, radio, man"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a series of beeps and chirps", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a clock?", "label": 0}, {"captions": ["a man talks while a clock does ticktock", "a child speaks in closed space"], "sample_ids": ["spYNpeN7rPY", "yW6FWLSLkx4"], "start_seconds": ["1", "40"], "properties": ["a clock, ticktock, man", "child, space, speak"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["humming and rattling of an engine idling as it revs", "a stream of water runs briefly"], "sample_ids": ["xMXvkIcaG0Y", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["sound, humming, rattling", "stream, water, run"], "captions_pred_video": ["footage of a car's hood being opened up to reveal the engine underneath the hood", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["an engine is revving and accelerating ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "a child speaks in closed space"], "sample_ids": ["wDVMhEdTiVw", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["gun, shoot, water", "child, space, speak"], "captions_pred_video": ["a blurry image of trees and water in the forest", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a woman speaks and is crumpling paper", "paper folding and crinkling"], "sample_ids": ["xvDdE3zNf8Y", "zPpG3RD8lSs"], "start_seconds": ["120", "20"], "properties": ["A, crumple, paper", "paper, fold, crinkle"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a woman speaks and crumples paper", "the wind blows and a mouse clicks "], "question": "which entity is crumpling paper", "label": 0}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["wjsXBsc7M40", "w5W5Kqtc8E"], "start_seconds": ["10", "100"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "wind, blow, vehicle"], "captions_pred_video": ["footage of the baby playing with a toothbrush", null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a baby laughing?", "label": 0}, {"captions": ["a door opens and closes", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["vBHyYJ8pL0", "y8WEcpOlT3I"], "start_seconds": ["2", "40"], "properties": ["open, close, door", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a man is speaking with wind noise in the background "], "question": "which entity is more likely to be a door", "label": 0}, {"captions": ["a machine clanks and thumps and a male speaks", "a man speaks as a motor runs in the background"], "sample_ids": ["sWZzXuWYY", "xZepNM9qcRA"], "start_seconds": ["420", "30"], "properties": ["male, clanks, thumps", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a dog barks and whimpers", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sShpyu2l4YQ", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["barks, whimpers, dog", "a woman, something, fried"], "captions_pred_video": ["the puppies are playing with a toy", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a dog is barking and growling", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a car speeding up in the distance"], "sample_ids": ["vbZ-0lGPneg", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["a woman, a television program, a bird", "distance, car, speed"], "captions_pred_video": ["of a man holding a baby duck in his hands", null], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["xfudFO976zE", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["animal, bleats, cry", "a woman, laughs, animal"], "captions_pred_video": ["footage is blurry and shaky", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "a man speaks while turning a water faucet on"], "sample_ids": ["vZAw4apG0Es", "vf9xf3vMsGM"], "start_seconds": ["30", "540"], "properties": ["background, clock, ticktocks", "A man speaks while turning a water faucet on."], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "of the person washing their hands under the faucet"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking while water is running in the background"], "question": "which entity is more active", "label": 1}, {"captions": ["birds vocalize and a man speaks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["v0wPrLBI3hg", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["vocalize, bird, speak", "multiple, people, yell"], "captions_pred_video": ["footage of the pigeons feeding on the ground", null], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a woman talks while something is fried and objects are tapped", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["yajyRTUQk3U", "uZesmtKZGSw"], "start_seconds": ["400", "250"], "properties": ["a woman, something, fried", "men, talk, cars"], "captions_pred_video": ["- a woman cooking in the kitchen", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about cars?", "label": 1}, {"captions": ["a machine runs continuously", "vehicle engines race around a track as a man commentates"], "sample_ids": ["wdXV3Pv0jiY", "sZPuqDgX2V0"], "start_seconds": ["11", "30"], "properties": ["machine, running, continuously", "commentator, race, track"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a man is speaking and a helicopter is flying overhead "], "question": "which machine is running continuously", "label": 0}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "a man speaks as a motor runs in the background"], "sample_ids": ["smDKStoHBJo", "xZepNM9qcRA"], "start_seconds": ["0", "30"], "properties": ["a, infant, speaking", "background, motor, run"], "captions_pred_video": ["a man holding a crying baby in his arms", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "dishes cling together then a man begins to speak"], "sample_ids": ["siJFXfGWgDk", "sQGXqGcwOTc"], "start_seconds": ["50", "3"], "properties": ["man, woman, vehicle", "cling, speak, dishes"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "mechanisms are operating and water is splashing "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["a person uses a saw to cut some wood", "someone snores nearby"], "sample_ids": ["sHbXC6na9hg", "spJCm8tD9Zo"], "start_seconds": ["0", "90"], "properties": ["a person, saw, wood", "someone snores, nearby, someone"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["an engine is idling and vibrating", "a person is snoring loudly"], "question": "which entity is a person", "label": 0}, {"captions": ["a man is snoring loudly and repeatedly", "wind blowing followed by a zoom"], "sample_ids": ["sncRqQ67iJU", "vr8ZXjEBhMQ"], "start_seconds": ["460", "150"], "properties": ["loud, repeatedly, man", "wind, blow, zoom"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a person is snoring", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["s4Uz1Ffgo04", "zj2R0XoFr5k"], "start_seconds": ["100", "50"], "properties": ["water, rushes, motorcycle", "airplane, boy, fly"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "a car accelerates and wind blows"], "sample_ids": ["xjhAnI2q6hM", "u0TrcHhkPQ"], "start_seconds": ["6", "20"], "properties": ["engine revs, vehicle, people", "accelerates, wind, blows"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", null], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a door opens and closes", "waves crash against a shoreline and people speak"], "sample_ids": ["vBHyYJ8pL0", "yFB25fqfU8I"], "start_seconds": ["2", "300"], "properties": ["open, close, door", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be a door", "label": 0}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "a woman speaks as she rubs two objects together"], "sample_ids": ["tQWGZLItBXk", "vzxHnu-SFEw"], "start_seconds": ["170", "80"], "properties": ["voice, music, whoosh", "two objects, woman, speak"], "captions_pred_video": ["worms revolution screenshots", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["water running down a sink while a man is talking", "a vehicle engine accelerating then running on idle"], "sample_ids": ["vSeGhaZt-aI", "vYkA3cfXp5Q"], "start_seconds": ["50", "30"], "properties": ["water, sink, talk", "engine, accelerate, idle"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "an engine is idling"], "question": "which is a vehicle", "label": 1}, {"captions": ["a vehicle engine revs and tires squeal", "a stream of water runs briefly"], "sample_ids": ["yDoT73BWsdA", "x-PeY8Yb8M4"], "start_seconds": ["10", "300"], "properties": ["engine revs, tires squeal, vehicle", "stream, water, run"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["y2bVZ7rz-5M", "wDVMhEdTiVw"], "start_seconds": ["280", "30"], "properties": ["engine, horn, siren", "gun, shoot, water"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a gun", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "a toilet flushes and a female speaks"], "sample_ids": ["uZesmtKZGSw", "yaln9y8I7ms"], "start_seconds": ["250", "230"], "properties": ["car, track, man", "female, flushes, toilet"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a toilet flushes and a man speaks"], "question": "which entity has a female speaking?", "label": 1}, {"captions": ["some clanking with distant murmuring", "a propeller rotates loudly and intensely"], "sample_ids": ["uMTTDZ2mb4", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["clanking, murmuring, distant", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "a clock ticktocks"], "sample_ids": ["vJ7JPEFhyLA", "v-g-j2uTByM"], "start_seconds": ["16", "30"], "properties": ["three men, wind, flow", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaks as a machine runs", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vD6lYD1l0BY", "vb1fPSDI4c"], "start_seconds": ["330", "30"], "properties": ["a, machine, run", "multiple, people, yell"], "captions_pred_video": ["game controller being held in the hands of the person", null], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "a cat meows as a young woman speaks"], "sample_ids": ["wTjoRj1se3U", "x5cuQjOdM3E"], "start_seconds": ["390", "30"], "properties": ["engine, run, people", "cat, meows, young woman"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "a black background with an airplane flying in the sky"], "captions_pred_audio": ["a jet engine is running and people are talking", "a cat meows and a woman speaks"], "question": "which entity is more likely to be a pet", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "a car speeding up in the distance"], "sample_ids": ["vVhthZ45k3Y", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["cat, purr, hiss", "distance, car, speed"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a person is whistling a tune", "someone is typing on a computer keyboard"], "sample_ids": ["scYRUkrFLiQ", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["a, tune, whistle", "keyboard, type, computer"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", "how to make money on youtube in spanish"], "captions_pred_audio": ["a person whistling a song", "a person is typing on a keyboard"], "question": "which is not a musical instrument", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "two women and a man talk while a kid cries"], "sample_ids": ["sHbXC6na9hg", "wyllXV6PjKo"], "start_seconds": ["0", "30"], "properties": ["a person, saw, wood", "a kid, talk, cry"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", null], "captions_pred_audio": ["an engine is idling and vibrating", "a woman speaks and a baby cries"], "question": "which entity is about a person cutting wood?", "label": 0}, {"captions": ["a motor idles, accelerates, then slows down.", "paper is crumpling consistently"], "sample_ids": ["vYkA3cfXp5Q", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["speed, idle, accelerate", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["an engine is idling", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "vehicles pass by on a roadway"], "sample_ids": ["vSeGhaZt-aI", "tgbONvsP47Y"], "start_seconds": ["50", "0"], "properties": ["water, bubbles, run", "pass, vehicle, roadway"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a car is driving on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a door opens and birds chirp", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["yeFvk9x0wWI", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["door, open, birds", "music, gunfire, explosion"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "a woman speaks as she rubs two objects together"], "sample_ids": ["sapQIQUhFc", "vzxHnu-SFEw"], "start_seconds": ["280", "80"], "properties": ["liquid, flow, distance", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is more likely to be a video of a woman speaking?", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a car speeding up in the distance"], "sample_ids": ["ugHJF0hfYkg", "u0TrcHhkPQ"], "start_seconds": ["10", "20"], "properties": ["engine, running, continuously", "distance, car, speed"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a duck quacks continuously", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vh30P49Po6s", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["quacks, continuously, duck", "a woman, something, fried"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a duck is quacking loudly", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a goat bleats as a person speaks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["tPJvjq9QePY", "uZesmtKZGSw"], "start_seconds": ["40", "250"], "properties": ["bleats, person, speak", "men, talk, cars"], "captions_pred_video": ["a dog and a sheep in a barn", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a baby cries and a man speaks", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a child and woman laughs and the woman speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["uPDn2BFTHk", "wwyfGO2J4"], "start_seconds": ["140", "90"], "properties": ["woman, laughs, speaks", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "people are clapping and speaking with background noise "], "question": "which entity shows more people", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["wqN6IIHw3po", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["rain, surface, fall", "two men, woman, birds"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", null], "captions_pred_audio": ["a man is speaking and water is splashing", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a heavy rain falls endlessly", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["wP8ZKrlx3oA", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["heavy, rain, fall", "female, spraying, scream"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a woman and man are speaking", "someone sprays a liquid onto a hard surface making a hiss sound"], "sample_ids": ["vbpKkWvfOu4", "zO-LSSY92ZM"], "start_seconds": ["560", "30"], "properties": ["two people, speaking, woman, man", "liquid, surface, sound"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "steam is hissing and hissing"], "question": "which entity is not a person", "label": 1}, {"captions": ["a rumble grows louder", "a man speaks as a car is passing by"], "sample_ids": ["y4MY9mp8-TA", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["loudness, increase, rumble", "a, car, pass"], "captions_pred_video": ["a helicopter flying in the sky", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a helicopter flies overhead ", "a man is speaking with background noise and breathing sounds "], "question": "which is not a rumble", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "some men converse over an engine running"], "sample_ids": ["yRx9txMcBl0", "sCiy7QS1U"], "start_seconds": ["40", "300"], "properties": ["accelerates, tires, squeals", "men, converse, engine"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", null], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["a diesel truck engine runs continuously", "a man speaks as a car is passing by"], "sample_ids": ["sZvwOuuPGP0", "sK4u5T8hW78"], "start_seconds": ["50", "30"], "properties": ["engine, diesel, truck", "a, car, pass"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a medium engine is running ", "a man is speaking with background noise and breathing sounds "], "question": "which is not a vehicle", "label": 1}, {"captions": ["children speak and play together", "a telephone rings followed by a woman talking"], "sample_ids": ["yVVP8XvWJTo", "tGcFnX0GHI"], "start_seconds": ["260", "0"], "properties": ["children, speak, play", "ring, talk, woman"], "captions_pred_video": ["footage of a playground at a school or daycare center", null], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["paper is crumpling consistently", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["v5cSxLaHADY", "vfYTJq7nU"], "start_seconds": ["0", "130"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "rustling, ducks, quack"], "captions_pred_video": ["footage of the person holding a pair of scissors", null], "captions_pred_audio": ["paper is crumpled and crinkled", "a duck quacks and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["uqFtmnhuqA8", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["a, b, c", "beeps, hit, woman"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a person speaks briefly", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["zOZleIRqZm4", "uZesmtKZGSw"], "start_seconds": ["80", "250"], "properties": ["person, talk, brief", "men, talk, cars"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["someone is snoring while sleeping", "a helicopter engine idles continuously"], "sample_ids": ["ujMt0-D-x2k", "ugHJF0hfYkg"], "start_seconds": ["0", "10"], "properties": ["snore, sleep, someone", "engine, idle, continuously"], "captions_pred_video": ["of the dog playing with a toy on the floor", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a person is snoring loudly", "a helicopter is flying overhead "], "question": "which entity is not a person", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vbr9mHKc8WM", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["noise, loudness, engine", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["an engine is idling", "a woman is speaking and a baby is crying"], "question": "which entity is silent", "label": 0}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "two men speak as a buffeting wind blows"], "sample_ids": ["x5cuQjOdM3E", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["cat, talk, meow", "wind, speak, buffeting"], "captions_pred_video": ["a black background with an airplane flying in the sky", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["slZLHwNbbt4", "uYT5gxnyMWM"], "start_seconds": ["300", "50"], "properties": ["clap, distance, horn", "female, spraying, scream"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a motorcycle engine is idling", "a man speaks as a motor runs in the background"], "sample_ids": ["vZAqdHZ81yA", "xZepNM9qcRA"], "start_seconds": ["180", "30"], "properties": ["engine, motorcycle, idling", "background, motor, run"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["an engine is idling loudly", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a duck quacks several times", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vh30P49Po6s", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["quacks, duck, several", "men, talk, cars"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a duck is quacking loudly", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man talks while a clock does ticktock", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["spYNpeN7rPY", "y8WEcpOlT3I"], "start_seconds": ["1", "40"], "properties": ["a clock, ticktock, man", "harsh, wind, blows"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is about a clock?", "label": 0}, {"captions": ["a woman speaks over sizzling noise", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["yajyRTUQk3U", "tDlysoZiA1I"], "start_seconds": ["400", "0"], "properties": ["noise, woman, speak", "animal, grunts, chirps"], "captions_pred_video": ["- a woman cooking in the kitchen", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "birds are chirping and a rooster is crowing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "a person snores loudly multiple times at a close distance"], "sample_ids": ["w34HjHr6gAY", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["beeps, hit, woman", "loud, multiple, distance"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", null], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "a man speaks while water trickles and flows"], "sample_ids": ["wqZ135Ssz0", "sapQIQUhFc"], "start_seconds": ["60", "280"], "properties": ["two men, woman, birds", "water, trickles, flow"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a man is speaking and a stream is flowing in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "a man speaks as a motor runs in the background"], "sample_ids": ["wy1eKjR7KC0", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["people, talk, distance", "background, motor, run"], "captions_pred_video": ["two police officers riding motorcycles down the street", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking and a siren is going off", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "people applaud and hoot and chat quietly"], "sample_ids": ["sQwlkXjQabo", "wwyfGO2J4"], "start_seconds": ["10", "90"], "properties": ["liquid, surface, spray", "people, applaud, hoot"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", null], "captions_pred_audio": ["spraying followed by silence", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be a performance", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["x5cuQjOdM3E", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["cat, meows, young woman", "a woman, something, fried"], "captions_pred_video": ["a black background with an airplane flying in the sky", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a cat meows and a woman speaks", "a woman is speaking while food is frying in the background"], "question": "which entity is about a cat?", "label": 0}, {"captions": ["birds chirp and objects are moved around", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["yPUYU6t3rwo", "uEU-Hg5MTN8"], "start_seconds": ["370", "27"], "properties": ["birds chirp, objects are moved around, birds", "a woman, laughs, animal"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["insects buzz and a man speaks", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a video of a person speaking and laughing?", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a loud engine muffles a man as he speaks"], "sample_ids": ["vZAw4apG0Es", "xyx6eNVEYRY"], "start_seconds": ["30", "380"], "properties": ["background, tick, repeat", "loud, engine, muffles"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of a helicopter landing on a runway at an airport"], "captions_pred_audio": ["a clock is ticking and people are talking", "an aircraft engine is running and a man is speaking "], "question": "which entity is muffled", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "after a few seconds of silence, a loud bang occurs followed by a softer banging noise"], "sample_ids": ["weDbePuc-Xc", "zkKdxzNC97Y"], "start_seconds": ["40", "27"], "properties": ["cartoon character, music, vocalize", "loud, bang, noise"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "footage of the door opening and closing in slow motion"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a door is opened and closed"], "question": "which entity is more quiet", "label": 1}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "paper folding and crinkling"], "sample_ids": ["wP8ZKrlx3oA", "zPpG3RD8lSs"], "start_seconds": ["40", "20"], "properties": ["rain, storm, thunder", "paper, fold, crinkle"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a heavy rain is falling on a surface", "the wind blows and a mouse clicks "], "question": "which entity is not a natural phenomenon", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "water pouring and bubbling"], "sample_ids": ["spYNpeN7rPY", "uyRfq-jKPpo"], "start_seconds": ["1", "50"], "properties": ["a clock, ticktock, man", "water, bubbles, pouring"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["xERFUeZONz8", "xKB8O8LTs6s"], "start_seconds": ["0", "70"], "properties": ["ring, approach, traffic", "music, gunfire, explosion"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["an emergency vehicle siren blares", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more calm", "label": 0}, {"captions": ["a goat bleats as a person speaks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["tPJvjq9QePY", "vb1fPSDI4c"], "start_seconds": ["40", "30"], "properties": ["bleats, person, speak", "multiple, people, yell"], "captions_pred_video": ["a dog and a sheep in a barn", null], "captions_pred_audio": ["a baby cries and a man speaks", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "small dogs yip and bark sharply"], "sample_ids": ["xV7Mg1QucSc", "v-wcQf4BDY0"], "start_seconds": ["14", "120"], "properties": ["alarm, ticktocks, laughs", "bark, yip, sharply"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a duck quacks continuously", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vh30P49Po6s", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["quacks, continuously, duck", "loud, jet engine, roar"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a duck is quacking loudly", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a power tool runs and touches a surface", "a horn rings out as a machine runs by"], "sample_ids": ["zfvPRf3chY", "slZLHwNbbt4"], "start_seconds": ["290", "300"], "properties": ["power tool, run, touch", "a, horn, run"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking while a power tool is being used ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man yells and speaks as water splashes", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vimzuGQvdcU", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["a, man, yells", "stream, water, flow"], "captions_pred_video": ["a group of people are rafting down a river", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while water is splashing and a child is speaking ", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing?", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a man speaks as a car is passing by"], "sample_ids": ["xSKJGCItUWE", "sK4u5T8hW78"], "start_seconds": ["10", "30"], "properties": ["engine, run, boy", "a, car, pass"], "captions_pred_video": ["footage of the helicopter flying in the room", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a boy speaking?", "label": 0}, {"captions": ["a man speaks as a machine runs", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vD6lYD1l0BY", "su6FAOcOA8c"], "start_seconds": ["330", "4"], "properties": ["a, machine, run", "engine, idle, woman"], "captions_pred_video": ["game controller being held in the hands of the person", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["paper folding and crinkling", "winds blows roughly as a vehicle races past"], "sample_ids": ["zPpG3RD8lSs", "xjvTpk2Zpr8"], "start_seconds": ["20", "70"], "properties": ["paper, fold, crinkle", "wind, blows, vehicle"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["dishes cling together then a man begins to speak", "some tunes played by whistling"], "sample_ids": ["sQGXqGcwOTc", "u6BnG6YZqJ4"], "start_seconds": ["3", "0"], "properties": ["cling, speak, dishes", "tune, play, whistling"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["an aircraft engine runs", "a car accelerates and wind blows"], "sample_ids": ["yLCORCnd35Q", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["engine, aircraft, runs", "accelerates, wind, blows"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", null], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "wind blowing followed by a zoom"], "sample_ids": ["yLy-WycbVVE", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["background, people, talk", "wind, blow, zoom"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "a car speeding up in the distance"], "sample_ids": ["xyL9F5VrjkE", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["wind, motor, distance", "distance, car, speed"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", null], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a race car accelerates and revs its engine "], "question": "which object is moving faster", "label": 0}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "dishes cling together then a man begins to speak"], "sample_ids": ["y8WEcpOlT3I", "sQGXqGcwOTc"], "start_seconds": ["40", "3"], "properties": ["harsh, wind, blows", "cling, speak, dishes"], "captions_pred_video": ["on how to use a sewing machine youtube", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["wind noise makes sound into a microphone", "roadway noise occurs and a truck accelerates"], "sample_ids": ["w8uLijTqtlU", "tgbONvsP47Y"], "start_seconds": ["70", "0"], "properties": ["wind, microphone, noise", "noise, truck, accelerate"], "captions_pred_video": ["footage is blurry and shaky", "footage of a fire truck entering a garage"], "captions_pred_audio": ["the wind is blowing strongly", "a car is driving on the road "], "question": "which noise is caused by a truck", "label": 1}, {"captions": ["material crumbles into a microphone", "an airplane engine runs"], "sample_ids": ["vofpvUo6NAw", "yVPZ2MNWpms"], "start_seconds": ["220", "0"], "properties": ["material, crumbles, microphone", "engine, airplane, runs"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a person whistles and clicks a mouse", "a small engine spits as it runs"], "sample_ids": ["zCrAfDfv6-A", "sZvwOuuPGP0"], "start_seconds": ["30", "50"], "properties": ["person, mouse, click", "spits, engine, runs"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", "of a bulldozer clearing a road in a forest stock footage and royalty-free videos"], "captions_pred_audio": ["a person whistles a song", "a medium engine is running "], "question": "which entity is not a person?", "label": 1}, {"captions": ["birds chirp then an animal grunts", "people applaud and hoot and chat quietly"], "sample_ids": ["tDlysoZiA1I", "wwyfGO2J4"], "start_seconds": ["0", "90"], "properties": ["animal, grunt, chirp", "people, applaud, hoot"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", null], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds vocalize and chirp continuously", "vehicle engines race around a track as a man commentates"], "sample_ids": ["w1mlz3Pe4fU", "sZPuqDgX2V0"], "start_seconds": ["300", "30"], "properties": ["vocalize, chirp, continuously", "commentator, race, track"], "captions_pred_video": ["of a bird in a cage", null], "captions_pred_audio": ["birds are chirping and singing", "a man is speaking and a helicopter is flying overhead "], "question": "which entity is a human activity", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "some men converse over an engine running"], "sample_ids": ["uYT5gxnyMWM", "sCiy7QS1U"], "start_seconds": ["50", "300"], "properties": ["female, spraying, scream", "men, converse, engine"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is more calm", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "wind blowing followed by a zoom"], "sample_ids": ["s4Uz1Ffgo04", "vr8ZXjEBhMQ"], "start_seconds": ["100", "150"], "properties": ["water, rushes, motorcycle", "wind, blow, zoom"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a vehicle accelerates squealing tires", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["sd7xVssqlw", "zl9Dqx-j7q4"], "start_seconds": ["50", "6"], "properties": ["accelerates, tires, squealing", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a jet engine roars "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["a man speaks as a machine runs", "paper is crumpling consistently"], "sample_ids": ["vD6lYD1l0BY", "v5cSxLaHADY"], "start_seconds": ["330", "0"], "properties": ["a, machine, run", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["birds twitter and chirp and clatter", "water splashes and a motorboat passes as people yell"], "sample_ids": ["yeFvk9x0wWI", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["chirp, twitter, clatter", "water, splashes, motorboat"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", null], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is more active", "label": 1}, {"captions": ["a motorcycle engine is idling", "a car accelerates and wind blows"], "sample_ids": ["vZAqdHZ81yA", "u0TrcHhkPQ"], "start_seconds": ["180", "20"], "properties": ["engine, motorcycle, idling", "accelerates, wind, blows"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", null], "captions_pred_audio": ["an engine is idling loudly", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a vehicle accelerates and squeals tires"], "sample_ids": ["ugHJF0hfYkg", "yRx9txMcBl0"], "start_seconds": ["10", "40"], "properties": ["engine, running, continuously", "accelerates, tires, squeals"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a helicopter is flying overhead ", "a car is revving its engine and skidding "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["yNtRmrn0io8", "tiDFTC-5vU"], "start_seconds": ["210", "30"], "properties": ["storm, distance, strike", "male, duck, laugh"], "captions_pred_video": ["footage of a house in the middle of the night", null], "captions_pred_audio": ["rain falls and thunder roars", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["children cry and people talk", "a toilet flushes and a female speaks"], "sample_ids": ["xLwHe825Zs", "yaln9y8I7ms"], "start_seconds": ["18", "230"], "properties": ["people talk, children cry, people talk", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a baby cries and a woman speaks", "a toilet flushes and a man speaks"], "question": "which entity is a video of a toilet flushing?", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "a child speaks in closed space"], "sample_ids": ["w-4gHptFNuU", "yW6FWLSLkx4"], "start_seconds": ["21", "40"], "properties": ["engine revs, accelerates, bump", "child, space, speak"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["water is sprayed across a hard surface", "a child and woman laughs and the woman speaks"], "sample_ids": ["sQwlkXjQabo", "uPDn2BFTHk"], "start_seconds": ["10", "140"], "properties": ["water, spray, surface", "woman, laughs, speaks"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", null], "captions_pred_audio": ["spraying followed by silence", "a baby laughs and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "several insects fly while two men talk"], "sample_ids": ["w0xsN8X18Y", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["music, surface, rain", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more insects", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "a car speeding up in the distance"], "sample_ids": ["yks4cLgIDMc", "u0TrcHhkPQ"], "start_seconds": ["170", "20"], "properties": ["background, speaking, child", "distance, car, speed"], "captions_pred_video": ["footage of two kids wrestling on the floor", null], "captions_pred_audio": ["a man is speaking and a child is crying", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "a woman and man speak while food is frying"], "sample_ids": ["wTideSjRFS0", "zk-xJGQU8-4"], "start_seconds": ["30", "130"], "properties": ["food, sizzle, woman", "food, man, woman"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "a man and a woman cooking in a wok on the stove"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a woman is speaking while dishes are clanging and music is playing in the background "], "question": "which entity has a man speaking while food is frying?", "label": 1}, {"captions": ["a male speaks over some small clicks", "people speak as gunfire rings out"], "sample_ids": ["uXxVebHsGZ8", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["male, clicks, speak", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["sSMl2vc3ek", "uYT5gxnyMWM"], "start_seconds": ["20", "50"], "properties": ["a person, laughs, snores", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person?", "label": 0}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "a car speeding up in the distance"], "sample_ids": ["ukg5L09Wpvo", "u0TrcHhkPQ"], "start_seconds": ["150", "20"], "properties": ["clickety-clack, train, whistle", "distance, car, speed"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", null], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wqZ135Ssz0", "tdWhHV3X25Q"], "start_seconds": ["60", "60"], "properties": ["man, woman, squawks", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vlS6YMeWAPo", "su6FAOcOA8c"], "start_seconds": ["40", "4"], "properties": ["noise, bleat, call", "engine, idle, woman"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a goat bleats and birds chirp", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "people applaud and hoot and chat quietly"], "sample_ids": ["slZLHwNbbt4", "wwyfGO2J4"], "start_seconds": ["300", "90"], "properties": ["clap, distance, horn", "people, applaud, hoot"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", null], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["yRx9txMcBl0", "tDVADusiIoc"], "start_seconds": ["40", "60"], "properties": ["accelerates, tires, squeals", "water, radio, man"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "some men converse over an engine running"], "sample_ids": ["vlS6YMeWAPo", "sCiy7QS1U"], "start_seconds": ["40", "300"], "properties": ["sheep, baa, birds", "men, converse, engine"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", null], "captions_pred_audio": ["a goat bleats and birds chirp", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a conversation?", "label": 1}, {"captions": ["a toilet flushes and water drains", "some tunes played by whistling"], "sample_ids": ["sfAvvZwdLCY", "u6BnG6YZqJ4"], "start_seconds": ["20", "0"], "properties": ["water drains, flushes, water", "tune, play, whistling"], "captions_pred_video": ["footage of the toilet in the bathroom", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a toilet is flushed", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a kid speaks followed by music playing", "a telephone rings followed by a woman talking"], "sample_ids": ["tQWGZLItBXk", "tGcFnX0GHI"], "start_seconds": ["170", "0"], "properties": ["music, kid, speak", "ring, talk, woman"], "captions_pred_video": ["worms revolution screenshots", null], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["goats bleat and people speak", "people applaud and hoot and chat quietly"], "sample_ids": ["z5iUE5h0EPs", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["goats bleat, people speak, language", "people, applaud, hoot"], "captions_pred_video": ["of the goat in the barn", null], "captions_pred_audio": ["a goat bleats and a man speaks", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "an insect buzzes around continuously"], "sample_ids": ["w5W5Kqtc8E", "v25l1jef3JY"], "start_seconds": ["100", "0"], "properties": ["wind, blow, vehicle", "buzzes, continuously, insect"], "captions_pred_video": [null, "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "wind blows as people chatter quietly"], "sample_ids": ["sapQIQUhFc", "xBxDz0CFVn0"], "start_seconds": ["280", "30"], "properties": ["liquid, flow, distance", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a machine beeps continuously"], "sample_ids": ["ugHJF0hfYkg", "y682ml90jGw"], "start_seconds": ["10", "11"], "properties": ["engine, running, continuously", "beeps, machine, continuously"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a beeping sound is being made "], "question": "which machine beeps continuously", "label": 1}, {"captions": ["a propeller moves loudly nearby", "an engine runs loudly"], "sample_ids": ["ugHJF0hfYkg", "vqZuVbG6-HI"], "start_seconds": ["10", "130"], "properties": ["loud, propeller, move", "loud, engine, run"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a helicopter is flying overhead ", "a lawn mower is running and men are speaking "], "question": "which is louder", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "water splashes as an animal walks through"], "sample_ids": ["tDVADusiIoc", "w1ir-sZ3Im8"], "start_seconds": ["60", "90"], "properties": ["man, radio, blows", "animal, water, splashes"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "a stream of water runs briefly"], "sample_ids": ["wqZ135Ssz0", "x-PeY8Yb8M4"], "start_seconds": ["60", "300"], "properties": ["man, woman, squawks", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "roadway noise occurs and a truck accelerates"], "sample_ids": ["w5W5Kqtc8E", "tgbONvsP47Y"], "start_seconds": ["100", "0"], "properties": ["wind, blow, vehicle", "noise, truck, accelerate"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a car is driving on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["zdYdyF9-m8U", "su6FAOcOA8c"], "start_seconds": ["7", "4"], "properties": ["wind, crash, shoreline", "engine, idle, woman"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["waves crash and wind blows ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["sAam2NqGhLY", "tiDFTC-5vU"], "start_seconds": ["20", "30"], "properties": ["snoring, breathing, child", "male, duck, laugh"], "captions_pred_video": ["of a little girl sleeping on a couch", null], "captions_pred_audio": ["a person is snoring", "a man is speaking and ducks are quacking"], "question": "which entity is a person", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "a stream of water flows as people talk and wind blows"], "sample_ids": ["wDVMhEdTiVw", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["gun, shoot, water", "stream, water, flow"], "captions_pred_video": ["a blurry image of trees and water in the forest", "footage is blurry and out of focus"], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing?", "label": 1}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "a machine beeps continuously"], "sample_ids": ["wsHBIgzs9Fs", "y682ml90jGw"], "start_seconds": ["50", "11"], "properties": ["horn, continuous, buzzing", "beeps, machine, continuously"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", null], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a beeping sound is being made "], "question": "which entity is continuous", "label": 1}, {"captions": ["wind blowing followed by a zoom", "small dogs yip and bark sharply"], "sample_ids": ["vr8ZXjEBhMQ", "v-wcQf4BDY0"], "start_seconds": ["150", "120"], "properties": ["wind, blow, zoom", "bark, yip, sharply"], "captions_pred_video": ["is taken from a motorcycle's point of view", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a cat meows and children speak", "a person snoring several times"], "sample_ids": ["x5cuQjOdM3E", "spJCm8tD9Zo"], "start_seconds": ["30", "90"], "properties": ["cat, speak, children", "snore, person, several"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a cat meows and a woman speaks", "a person is snoring loudly"], "question": "which entity is more quiet", "label": 1}, {"captions": ["someone sprays a liquid onto a hard surface making a hiss sound", "people cheer as a vehicle engine revs"], "sample_ids": ["zO-LSSY92ZM", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["liquid, surface, sound", "engine revs, vehicle, people"], "captions_pred_video": ["youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["steam is hissing and hissing", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["w6RTHR6AeAg", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["call, owl, screech", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["wind loudly blowing while people speak in the background followed by a horn blowing", "a duck quacks continuously"], "sample_ids": ["xjhAnI2q6hM", "vh30P49Po6s"], "start_seconds": ["6", "30"], "properties": ["wind, blow, loudly", "quacks, continuously, duck"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "people speak softly as food sizzles"], "sample_ids": ["sU53zg9Jp7s", "yhQ2Lg-7qDY"], "start_seconds": ["380", "130"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "food, sizzle, speak"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "a pan filled with meat and sauce being cooked on a stove top"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a faucet is running and a man is speaking"], "question": "which entity is more quiet", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["s4Uz1Ffgo04", "wz7N8YRy74I"], "start_seconds": ["100", "30"], "properties": ["water, rushes, motorcycle", "rooster, crow, background, men"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["w9lpbUn0hPc", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["male, wind, rustling", "rooster, crow, background, men"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "a woman speaks and other women and a man talk with her"], "sample_ids": ["se87d6yxEOA", "vbpKkWvfOu4"], "start_seconds": ["10", "560"], "properties": ["run, whistle, pass", "a, woman, man"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a woman is speaking and a man is speaking"], "question": "which entity has more people", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "a man speaks over a running engine and blowing wind"], "sample_ids": ["zl9Dqx-j7q4", "ylpYOorfH4o"], "start_seconds": ["6", "410"], "properties": ["engine, laugh, loud", "engine, running, wind"], "captions_pred_video": ["footage of a man driving a car in the dark", "for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15"], "captions_pred_audio": ["a jet engine roars ", "a man is speaking and an engine is revving"], "question": "which entity is a man speaking over a running engine and blowing wind?", "label": 1}, {"captions": ["a train horn sounds as the train approaches", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["slZLHwNbbt4", "yDoT73BWsdA"], "start_seconds": ["300", "10"], "properties": ["train, horn, sound", "engine, revs, vehicle"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["people speak and tapping occurs", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["tFCUUGdREgA", "ziUT9IFTkjg"], "start_seconds": ["70", "10"], "properties": ["people, tap, speak", "background, birds, rustling"], "captions_pred_video": ["a person riding a white horse in an indoor arena", null], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "wind blows and a vehicle blows a hard then a train blows a horn"], "sample_ids": ["xyL9F5VrjkE", "wnpJndXuxLc"], "start_seconds": ["20", "50"], "properties": ["wind, motor, distance", "blows, vehicle, train"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is about a train blowing a horn?", "label": 1}, {"captions": ["some clanking with distant murmuring", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["uMTTDZ2mb4", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["clanking, murmuring, distant", "loud, laughter, intermittent"], "captions_pred_video": [null, "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more abrasive", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["uC9dtII1KDI", "tdWhHV3X25Q"], "start_seconds": ["150", "60"], "properties": ["wind, gusts, distance", "applause, audience, yells"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a man is speaking and a crowd is clapping"], "question": "which entity is a human activity", "label": 1}, {"captions": ["a woman speaks and other women and a man talk with her", "an infant crying as a woman laughs"], "sample_ids": ["vbpKkWvfOu4", "xhmRY9yhC7c"], "start_seconds": ["560", "20"], "properties": ["a, woman, man", "a, laugh, infant"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a baby cries and a woman speaks"], "question": "which woman is laughing", "label": 1}, {"captions": ["someone whistles a tune", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["sIXTftIuUgw", "tDVADusiIoc"], "start_seconds": ["90", "60"], "properties": ["someone, tune, whistle", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a person whistling a song", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["yJ0TePmaOo", "wz7N8YRy74I"], "start_seconds": ["390", "30"], "properties": ["two hard objects, man, speak", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "a toilet flushes and a female speaks"], "sample_ids": ["zhx6hoYrHeI", "yaln9y8I7ms"], "start_seconds": ["160", "230"], "properties": ["engine, sputter, rough", "female, flushes, toilet"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", "footage is blurry and out of focus"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "a telephone rings followed by a woman talking"], "sample_ids": ["w0xsN8X18Y", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["music, surface, rain", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a man speaks as crickets sing", "water runs into a sink while men speak"], "sample_ids": ["ryFDPxgDOGc", "vzceMbklWc"], "start_seconds": ["570", "180"], "properties": ["a, crickets, sing", "water, sink, run"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "water is running and a man is speaking"], "question": "which entity is a man speaking as crickets sing?", "label": 0}, {"captions": ["an engine works in idle nearby followed by a man talking", "a man speaks as a car is passing by"], "sample_ids": ["wqADXCzngMw", "sK4u5T8hW78"], "start_seconds": ["340", "30"], "properties": ["engine, idle, man", "a, car, pass"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["wSVhSdj0F0", "uEU-Hg5MTN8"], "start_seconds": ["10", "27"], "properties": ["beep, clang, footsteps", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be heard in a car", "label": 0}, {"captions": ["a man speaks, then dials a rotary telephone", "an airplane engine spools and people speak"], "sample_ids": ["tK4VlLsNxak", "wTjoRj1se3U"], "start_seconds": ["120", "390"], "properties": ["a, dial, telephone", "airplane, engine, spool"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a jet engine is running and people are talking"], "question": "which entity is a moving object", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "multiple people speak and children yell while water gurgles"], "sample_ids": ["tIY7qOV3rEM", "vb1fPSDI4c"], "start_seconds": ["0", "30"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "multiple, people, yell"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", null], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a crowd of people are talking and laughing"], "question": "which entity is a human activity", "label": 1}, {"captions": ["a toilet flushes and water drains", "a small engine idles continuously"], "sample_ids": ["sfAvvZwdLCY", "y5WII6cTH7k"], "start_seconds": ["20", "40"], "properties": ["water drains, flushes, water", "engine, idle, continuously"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a sewing machine stitching a red and white hat"], "captions_pred_audio": ["a toilet is flushed", "an engine is knocking and vibrating "], "question": "which entity is not a machine?", "label": 0}, {"captions": ["a man talks as something metal hits against and glass is set down", "wind blowing followed by a zoom"], "sample_ids": ["x6ijhqRY38s", "vr8ZXjEBhMQ"], "start_seconds": ["250", "150"], "properties": ["something metal, glass, hit", "wind, blow, zoom"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "small dogs yip and bark sharply"], "sample_ids": ["w5W5Kqtc8E", "v-wcQf4BDY0"], "start_seconds": ["100", "120"], "properties": ["wind, blow, vehicle", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "vehicles pass by on a roadway"], "sample_ids": ["uYT5gxnyMWM", "tgbONvsP47Y"], "start_seconds": ["50", "0"], "properties": ["person, spray, yell", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a car is driving on the road "], "question": "which entity is more passive", "label": 1}, {"captions": ["a kid speaks followed by music playing", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["tQWGZLItBXk", "vbZ-0lGPneg"], "start_seconds": ["170", "30"], "properties": ["music, kid, speak", "a woman, a television program, a bird"], "captions_pred_video": ["worms revolution screenshots", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a woman is speaking and a dog is whimpering"], "question": "which entity has more birds", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "people speak as gunfire rings out"], "sample_ids": ["w5W5Kqtc8E", "wqTCwqVRDlk"], "start_seconds": ["100", "80"], "properties": ["wind, engine, scream", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking and a gun is fired"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks followed by another man speaking outside", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["viuTg1M-dqg", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["two men, speak, follow", "a woman, something, fried"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "a woman speaks as she rubs two objects together"], "sample_ids": ["xl2PIWyXaM", "vzxHnu-SFEw"], "start_seconds": ["160", "80"], "properties": ["chirp, man, younger person", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["birds are chirping and people are talking", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vlJS7LN2XyM", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["background, clocks, ticking", "a woman, a television program, a bird"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a ticktock of a clock", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program in the background?", "label": 1}, {"captions": ["someone is burping continuously", "water is sprayed across a hard surface"], "sample_ids": ["y636gklDioE", "sQwlkXjQabo"], "start_seconds": ["20", "10"], "properties": ["burps, burps, burps", "water, spray, surface"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a person burps loudly several times", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a door opens and birds chirp", "water pouring and bubbling"], "sample_ids": ["yeFvk9x0wWI", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["door, open, birds", "water, bubbles, pouring"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "water is running from a faucet"], "question": "which entity is more likely to be in a bathroom", "label": 1}, {"captions": ["a child speaks", "water splashes as an animal walks through"], "sample_ids": ["yW6FWLSLkx4", "w1ir-sZ3Im8"], "start_seconds": ["40", "90"], "properties": ["a, child, speaks", "animal, water, splashes"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a man talks as several small engines run", "a vehicles accelerate quickly and someone laughs"], "sample_ids": ["u9A6VZQCZpU", "uWPRNLnpy7Y"], "start_seconds": ["30", "10"], "properties": ["a, man, talk", "accelerate, laugh, vehicle"], "captions_pred_video": [null, "is taken from a car driving down the street"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a car accelerates and revs its engine "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["zgUgkpk78xU", "uYT5gxnyMWM"], "start_seconds": ["70", "50"], "properties": ["horn, bells, ring", "female, spraying, scream"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a woman is speaking and a baby is crying"], "question": "which entity is a warning", "label": 0}, {"captions": ["a man speaks while playing a video game on a keyboard", "people applaud and hoot and chat quietly"], "sample_ids": ["tw76HGONaKg", "wwyfGO2J4"], "start_seconds": ["570", "90"], "properties": ["A, game, keyboard", "people, applaud, hoot"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", null], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "a person snores loudly multiple times at a close distance"], "sample_ids": ["rwtmaKiCcQU", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["nozzle, depressed, spray can", "loud, multiple, distance"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", null], "captions_pred_audio": ["spraying and people speaking", "a person snoring loudly"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a sleeping person snores and wheezes", "people speak as gunfire rings out"], "sample_ids": ["spJCm8tD9Zo", "wqTCwqVRDlk"], "start_seconds": ["90", "80"], "properties": ["snores, wheezes, sleeps", "gunfire, ring, speak"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a gun is fired"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "paper is crumpling consistently"], "sample_ids": ["sapQIQUhFc", "v5cSxLaHADY"], "start_seconds": ["280", "0"], "properties": ["liquid, flow, distance", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["vb1fPSDI4c", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["multiple, people, yell", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a crowd of people are talking and laughing", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 0}, {"captions": ["small dogs growl, bark and yip.", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sShpyu2l4YQ", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["growl, bark, yip", "airplane, boy, fly"], "captions_pred_video": ["the puppies are playing with a toy", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a dog is barking and growling", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "three men talk while wind blows and some liquid flows"], "sample_ids": ["uZesmtKZGSw", "vJ7JPEFhyLA"], "start_seconds": ["250", "16"], "properties": ["car, track, man", "three men, wind, flow"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about cars going around a track?", "label": 0}, {"captions": ["an adult man speaks over glass clinking", "water splashes as an animal walks through"], "sample_ids": ["u6jIvCtKarQ", "w1ir-sZ3Im8"], "start_seconds": ["70", "90"], "properties": ["a, man, speaks", "animal, water, splashes"], "captions_pred_video": ["footage of a person using a blender on a stove top", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["w2M4i1mklOA", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["alarm, gears, turn", "gun, shoot, water"], "captions_pred_video": ["footage of an antique clock", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["a small engine spits as it runs", "a infant makes noise and is excited"], "sample_ids": ["sZvwOuuPGP0", "wIJK3-5y0kA"], "start_seconds": ["50", "30"], "properties": ["spits, engine, runs", "noise, excited, infant"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a medium engine is running ", "a baby cries and a woman speaks"], "question": "which entity is a human", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "paper is crumpling consistently"], "sample_ids": ["xyL9F5VrjkE", "v5cSxLaHADY"], "start_seconds": ["20", "0"], "properties": ["wind, motor, distance", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "a car speeding up in the distance"], "sample_ids": ["vddP56-ogds", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["water, splash, person, laugh", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "birds chirp, a woman speaks, and insects buzz"], "sample_ids": ["un9VQlzgZM", "t97k0cejSQE"], "start_seconds": ["5", "250"], "properties": ["females, talk, laugh", "sound, chirp, buzz"], "captions_pred_video": [null, "a bee on a purple thistle flower"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a bee buzzes and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a small engine idles continuously", "an infant crying as a woman laughs"], "sample_ids": ["y5WII6cTH7k", "xhmRY9yhC7c"], "start_seconds": ["40", "20"], "properties": ["engine, idle, continuously", "a, laugh, infant"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a baby cries and a woman speaks"], "question": "which entity is not a person?", "label": 0}, {"captions": ["some clanking with distant murmuring", "winds blows roughly as a vehicle races past"], "sample_ids": ["uMTTDZ2mb4", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["clanking, murmuring, distant", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["a person is snoring while sleeping", "wind blows as people chatter quietly"], "sample_ids": ["vJrjSeP17yE", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["a person is sleeping, snoring, person", "wind, chatter, people"], "captions_pred_video": ["a black background with a small plane flying in the sky", "footage is blurry and out of focus"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "dishes cling together then a man begins to speak"], "sample_ids": ["vhJWZheqaE", "sQGXqGcwOTc"], "start_seconds": ["0", "3"], "properties": ["water drains unevenly, toilet flushes, water drains", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a toilet is flushed", "mechanisms are operating and water is splashing "], "question": "which entity is a video of a toilet flushing and water draining unevenly?", "label": 0}, {"captions": ["children speak as a female ask them questions", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wEBlkGWVWwE", "vJ7JPEFhyLA"], "start_seconds": ["260", "16"], "properties": ["female, speak, questions", "three men, wind, flow"], "captions_pred_video": ["shows a person writing on the whiteboard", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a female speaking to children?", "label": 0}, {"captions": ["water gurgles, metal squeaks and the water stops", "some tunes played by whistling"], "sample_ids": ["x4a9YGIw4ok", "u6BnG6YZqJ4"], "start_seconds": ["120", "0"], "properties": ["water, gurgles, stops", "tune, play, whistling"], "captions_pred_video": ["footage is blurry and out of focus", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a toilet flushes and water splashes", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "water runs into a sink while men speak"], "sample_ids": ["zcDwZ6W7E3E", "vzceMbklWc"], "start_seconds": ["180", "180"], "properties": ["a, man, speak", "water, sink, run"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", null], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "water is running and a man is speaking"], "question": "which entity is about water running into a sink?", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["yajyRTUQk3U", "y8WEcpOlT3I"], "start_seconds": ["400", "40"], "properties": ["noise, woman, speak", "harsh, wind, blows"], "captions_pred_video": ["- a woman cooking in the kitchen", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking with wind noise in the background "], "question": "which entity is a man speaking over a harsh wind?", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["ugHJF0hfYkg", "w5W5Kqtc8E"], "start_seconds": ["10", "100"], "properties": ["engine, running, continuously", "wind, blow, vehicle"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a running engine", "label": 1}, {"captions": ["the revving of an engine throttle followed by a man speaking", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["tezvROoo4bs", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["audio, throttle, speaking", "female, spraying, scream"], "captions_pred_video": ["footage of a busy city street with cars parked on both sides of the road", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a car accelerates and revs while a man speaks ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["an aircraft engine runs", "an insect buzzes around continuously"], "sample_ids": ["yLCORCnd35Q", "v25l1jef3JY"], "start_seconds": ["0", "0"], "properties": ["engine, aircraft, runs", "buzzes, continuously, insect"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vqZuVbG6-HI", "wDVMhEdTiVw"], "start_seconds": ["130", "30"], "properties": ["background, male, female", "gun, shoot, water"], "captions_pred_video": ["footage is blurry because it's raining outside", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["music plays followed by gunshots and then an explosion", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["xKB8O8LTs6s", "uYT5gxnyMWM"], "start_seconds": ["70", "50"], "properties": ["music, gunshots, explosion", "female, spraying, scream"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a woman is speaking and a baby is crying"], "question": "which entity is more calm", "label": 1}, {"captions": ["a helicopter engine idles continuously", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["ugHJF0hfYkg", "yDoT73BWsdA"], "start_seconds": ["10", "10"], "properties": ["engine, idle, continuously", "engine, revs, vehicle"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a helicopter is flying overhead ", "a race car accelerates and revs its engine "], "question": "which engine is revving", "label": 1}, {"captions": ["sawing of wood and rustling with leaves blowing in the distance", "birds chirp, a woman speaks, and insects buzz"], "sample_ids": ["uiItxDsDMFI", "t97k0cejSQE"], "start_seconds": ["30", "250"], "properties": ["sound, distance, leaves", "sound, chirp, buzz"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "a bee on a purple thistle flower"], "captions_pred_audio": ["a saw is being used with background noise ", "a bee buzzes and a woman speaks"], "question": "which entity has a higher pitch", "label": 1}, {"captions": ["a person sniffs and sneezes", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["uRlbY6aoBU", "zl9Dqx-j7q4"], "start_seconds": ["0", "6"], "properties": ["sneezes, person, sniffs", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is sneezing ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a door slams shut roughly", "a toilet flushes and water drains"], "sample_ids": ["zkKdxzNC97Y", "sfAvvZwdLCY"], "start_seconds": ["27", "20"], "properties": ["a door, slams, shut", "water drains, flushes, water"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a door is opened and closed", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "a vehicle engine accelerating then running on idle"], "sample_ids": ["se87d6yxEOA", "vYkA3cfXp5Q"], "start_seconds": ["10", "30"], "properties": ["run, whistle, pass", "engine, accelerate, idle"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "an engine is idling"], "question": "which engine is running on idle", "label": 1}, {"captions": ["a motor idles, accelerates, then slows down.", "a baby cries and fusses, a woman speaks, and a man speaks"], "sample_ids": ["vYkA3cfXp5Q", "wyllXV6PjKo"], "start_seconds": ["30", "30"], "properties": ["speed, idle, accelerate", "a baby, a woman, a man"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", null], "captions_pred_audio": ["an engine is idling", "a woman speaks and a baby cries"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks while video game music plays with some clicking", "people applaud and hoot and chat quietly"], "sample_ids": ["tw76HGONaKg", "wwyfGO2J4"], "start_seconds": ["570", "90"], "properties": ["music, click, man", "people, applaud, hoot"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", null], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "people are clapping and speaking with background noise "], "question": "which entity has more people", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "metal clacking as food and oil sizzles followed by a woman talking"], "sample_ids": ["sTpirNYo8vQ", "vW4x7S1VfQc"], "start_seconds": ["30", "150"], "properties": ["a, tone, fast", "clacking, oil, woman"], "captions_pred_video": ["of a man taking a selfie on a bus", "footage of a person cooking fish in a frying pan on a stove top"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "food sizzles in a frying pan"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "a man woman speak while crickets sing"], "sample_ids": ["xvDdE3zNf8Y", "zTLVJCo4WEE"], "start_seconds": ["120", "30"], "properties": ["a, female, speaks", "a, crickets, sing"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "- a boy with a rifle aiming at a target"], "captions_pred_audio": ["a woman speaks and crumples paper", "a woman speaks and crickets chirp"], "question": "which entity has a female speaking softly as paper crinkles?", "label": 0}, {"captions": ["an adult woman and an adult man speak", "multiple people speak and children yell while water gurgles"], "sample_ids": ["zTLVJCo4WEE", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["two people, adult, speak", "multiple, people, yell"], "captions_pred_video": ["- a boy with a rifle aiming at a target", null], "captions_pred_audio": ["a woman speaks and crickets chirp", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a toilet flushes and a female speaks"], "sample_ids": ["vlJS7LN2XyM", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["background, clocks, ticking", "female, flushes, toilet"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage is blurry and out of focus"], "captions_pred_audio": ["a ticktock of a clock", "a toilet flushes and a man speaks"], "question": "which entity is accompanied by a female speaking?", "label": 1}, {"captions": ["water pouring and bubbling", "someone is typing on a computer keyboard"], "sample_ids": ["uyRfq-jKPpo", "v0x1odnXtP0"], "start_seconds": ["50", "210"], "properties": ["water, bubbles, pouring", "keyboard, type, computer"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "how to make money on youtube in spanish"], "captions_pred_audio": ["water is running from a faucet", "a person is typing on a keyboard"], "question": "which is not a source of bubbles", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["zkKdxzNC97Y", "wSVhSdj0F0"], "start_seconds": ["27", "10"], "properties": ["loud, bang, noise", "horn honks, keys jingle, electronic beep"], "captions_pred_video": ["footage of the door opening and closing in slow motion", null], "captions_pred_audio": ["a door is opened and closed", "a car horn honks and keys jangle with background noise "], "question": "which entity is softer", "label": 1}, {"captions": ["a consistent ticking pattern", "loud ringing of a telephone stops followed by a man speaking and a digital beep"], "sample_ids": ["sCeWURVHfOM", "uzQnlJXBbOM"], "start_seconds": ["30", "50"], "properties": ["ticking, pattern, clock", "ringing, beep, stop"], "captions_pred_video": ["- a close-up view of the clock's inner workings", "footage of a person using a cell phone on a table"], "captions_pred_audio": ["ticking of a clock", "a telephone rings and a man speaks"], "question": "which entity is a clock", "label": 0}, {"captions": ["a person snores hilariously while someone laughs", "someone snores nearby"], "sample_ids": ["sSMl2vc3ek", "spJCm8tD9Zo"], "start_seconds": ["20", "90"], "properties": ["a person, laughs, snores", "someone snores, nearby, someone"], "captions_pred_video": [null, "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a person snoring loudly", "a person is snoring loudly"], "question": "which entity is a person?", "label": 0}, {"captions": ["a car speeding up in the distance", "a telephone rings followed by a woman talking"], "sample_ids": ["u0TrcHhkPQ", "tGcFnX0GHI"], "start_seconds": ["20", "0"], "properties": ["distance, car, speed", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["a motorcycle engine is idling", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["vZAqdHZ81yA", "wSVhSdj0F0"], "start_seconds": ["180", "10"], "properties": ["engine, motorcycle, idling", "horn honks, keys jingle, electronic beep"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", null], "captions_pred_audio": ["an engine is idling loudly", "a car horn honks and keys jangle with background noise "], "question": "which entity is not a motorcycle?", "label": 1}, {"captions": ["a person sniffs and sneezes", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["uRlbY6aoBU", "uZesmtKZGSw"], "start_seconds": ["0", "250"], "properties": ["sneezes, person, sniffs", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is sneezing ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["birds chirp and pigeons vocalize while walking around", "a car accelerates and wind blows"], "sample_ids": ["wIvYjuR3nrg", "u0TrcHhkPQ"], "start_seconds": ["9", "20"], "properties": ["birds, pigeons, vocalize", "accelerates, wind, blows"], "captions_pred_video": ["footage of a pigeon sitting on a roof with trees in the background", null], "captions_pred_audio": ["birds are chirping and cooing", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks with water running", "a cat meows and children speak"], "sample_ids": ["wTideSjRFS0", "x5cuQjOdM3E"], "start_seconds": ["30", "30"], "properties": ["water, running, woman", "cat, speak, children"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "a black background with an airplane flying in the sky"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a cat meows and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["an electronic device bleeps once", "a man speaks as insects buzz and a bird chirps"], "sample_ids": ["tHJ6JSa8Y4", "t25U-v4k4ts"], "start_seconds": ["0", "40"], "properties": ["bleeps, electronic, device", "a, chirps, bird"], "captions_pred_video": [null, "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["a clock is ticking and beeping", "a man is speaking and bees are buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "a woman speaks in a fast tone with a male"], "sample_ids": ["xZepNM9qcRA", "sTpirNYo8vQ"], "start_seconds": ["30", "30"], "properties": ["background, motor, run", "a, tone, fast"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "of a man taking a selfie on a bus"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a man is speaking while a car is revving and accelerating "], "question": "which entity is a man speaking?", "label": 0}, {"captions": ["a man talks as something metal hits against and glass is set down", "an engine runs loudly"], "sample_ids": ["x6ijhqRY38s", "vqZuVbG6-HI"], "start_seconds": ["250", "130"], "properties": ["something metal, glass, hit", "loud, engine, run"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a person sniffles and sneezes", "a man speaks, another man speaks, and a small bell dings"], "sample_ids": ["uRlbY6aoBU", "t69a8aRKhmc"], "start_seconds": ["0", "30"], "properties": ["sneezes, sniffles, person", "a, b, c"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man is sneezing ", "a man is speaking and birds are chirping in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["multiple insects buzz over rustling wind", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tMJne1a4AFI", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["wind, buzz, rustling", "a woman, something, fried"], "captions_pred_video": ["a swarm of bees on the ground", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a swarm of bees buzzing around", "a woman is speaking while food is frying in the background"], "question": "which entity is a video", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "winds blows roughly as a vehicle races past"], "sample_ids": ["vSeGhaZt-aI", "xjvTpk2Zpr8"], "start_seconds": ["50", "70"], "properties": ["water, bubbles, speak", "wind, blows, vehicle"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a calm environment", "label": 0}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "a toilet flushes and a female speaks"], "sample_ids": ["vuUVPzd2FXw", "yaln9y8I7ms"], "start_seconds": ["160", "230"], "properties": ["a, steam, release", "female, flushes, toilet"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a toilet flushes and a man speaks"], "question": "which entity is a video of a person speaking?", "label": 1}, {"captions": ["a horse runs while two women talk", "people cheer as a vehicle engine revs"], "sample_ids": ["sdvI1mHAsc", "xjhAnI2q6hM"], "start_seconds": ["20", "6"], "properties": ["two women, horse, run", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["an infant crying frantically", "a man speaks followed by another man speaking outside"], "sample_ids": ["zwOBqeFTgiU", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["cry, infant, frantically", "two men, speak, follow"], "captions_pred_video": ["of the baby crying in the car seat", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a baby cries loudly", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a human", "label": 1}, {"captions": ["a vehicle engine accelerating then running on idle", "a infant makes noise and is excited"], "sample_ids": ["vYkA3cfXp5Q", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["engine, accelerate, idle", "noise, excited, infant"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["an engine is idling", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["wind blows strongly", "dishes cling together then a man begins to speak"], "sample_ids": ["w8uLijTqtlU", "sQGXqGcwOTc"], "start_seconds": ["70", "3"], "properties": ["wind, blows, strongly", "cling, speak, dishes"], "captions_pred_video": ["footage is blurry and shaky", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["the wind is blowing strongly", "mechanisms are operating and water is splashing "], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a dog barks and whimpers", "some clanking with distant murmuring"], "sample_ids": ["sShpyu2l4YQ", "uMTTDZ2mb4"], "start_seconds": ["0", "30"], "properties": ["barks, whimpers, dog", "clanking, murmuring, distant"], "captions_pred_video": ["the puppies are playing with a toy", null], "captions_pred_audio": ["a dog is barking and growling", "people are talking and a car is driving by with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vlJS7LN2XyM", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["background, clocks, ticking", "clickety-clack, train, whistle"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a ticktock of a clock", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["an animal quacks rapidly", "someone whistles a tune"], "sample_ids": ["vh30P49Po6s", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["animal, quacks, rapidly", "someone, tune, whistle"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "a person whistling a song"], "question": "which entity is a human", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "white noise and snoring with some rustling in the background"], "sample_ids": ["zl9Dqx-j7q4", "xzKKf9bKNUo"], "start_seconds": ["6", "10"], "properties": ["engine, laugh, loud", "background, noise, snoring"], "captions_pred_video": ["footage of a man driving a car in the dark", "shows a woman laying on a bed with her eyes closed and her mouth open"], "captions_pred_audio": ["a jet engine roars ", "a person snoring loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "a car accelerates and wind blows"], "sample_ids": ["vXlk0lIQBFo", "u0TrcHhkPQ"], "start_seconds": ["470", "20"], "properties": ["wind, talk, vocalize", "accelerates, wind, blows"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", null], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "an infant crying frantically"], "sample_ids": ["x5cuQjOdM3E", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["cat, talk, meow", "cry, infant, frantically"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of the baby crying in the car seat"], "captions_pred_audio": ["a cat meows and a woman speaks", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "a man speaks as a car is passing by"], "sample_ids": ["sLUnaPT5gM8", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["loud, laughter, intermittent", "a, car, pass"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a man speaking to a car passing by?", "label": 1}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "an infant crying frantically"], "sample_ids": ["vfYTJq7nU", "zwOBqeFTgiU"], "start_seconds": ["130", "30"], "properties": ["ducks, quack, man", "cry, infant, frantically"], "captions_pred_video": [null, "of the baby crying in the car seat"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["tOj4tdLRaA", "w5W5Kqtc8E"], "start_seconds": ["70", "100"], "properties": ["woman, laugh, baby", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a baby laughing?", "label": 0}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["uqFtmnhuqA8", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["a, b, c", "gun, shoot, water"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a movie", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["ukxt9I7eMMg", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["food, pan, cook", "female, spraying, scream"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["sK4u5T8hW78", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["a, car, pass", "male, duck, laugh"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking?", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "a car speeding up in the distance"], "sample_ids": ["yZrFNS7GFBQ", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["pigeon, buzzes, insect", "distance, car, speed"], "captions_pred_video": ["of the bird in the cage", null], "captions_pred_audio": ["an owl hoots in the background ", "a race car accelerates and revs its engine "], "question": "which object is moving faster", "label": 0}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "a child and woman laughs and the woman speaks"], "sample_ids": ["ziUT9IFTkjg", "uPDn2BFTHk"], "start_seconds": ["10", "140"], "properties": ["background, birds, rustling", "woman, laughs, speaks"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a baby laughs and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["food is frying then a woman speaks", "some men converse over an engine running"], "sample_ids": ["ukxt9I7eMMg", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["food, woman, speak", "men, converse, engine"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a person whistles and clicks a mouse", "a car speeding up in the distance"], "sample_ids": ["zCrAfDfv6-A", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["person, mouse, click", "distance, car, speed"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", null], "captions_pred_audio": ["a person whistles a song", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["people speak in a closed space", "a telephone rings followed by a woman talking"], "sample_ids": ["sTpirNYo8vQ", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["people, space, speak", "ring, talk, woman"], "captions_pred_video": ["of a man taking a selfie on a bus", null], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "a car speeding up in the distance"], "sample_ids": ["t25U-v4k4ts", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["a, chirps, bird", "distance, car, speed"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", null], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["small dogs yip and bark sharply", "someone is typing on a computer keyboard"], "sample_ids": ["v-wcQf4BDY0", "v0x1odnXtP0"], "start_seconds": ["120", "210"], "properties": ["bark, yip, sharply", "keyboard, type, computer"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "how to make money on youtube in spanish"], "captions_pred_audio": ["a dog barks and growls", "a person is typing on a keyboard"], "question": "which entity is typing", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["yYEVLuqEytU", "su6FAOcOA8c"], "start_seconds": ["40", "4"], "properties": ["grunt, slurp, background", "engine, idle, woman"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "a child speaks in closed space"], "sample_ids": ["uiItxDsDMFI", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["wood, piece, saw", "child, space, speak"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a saw is being used with background noise ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is not being sawed", "label": 1}, {"captions": ["a small engine idles continuously", "several insects fly while two men talk"], "sample_ids": ["y5WII6cTH7k", "s-T9OVOiMLo"], "start_seconds": ["40", "330"], "properties": ["engine, idle, continuously", "several, fly, men"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is not stationary", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vzxHnu-SFEw", "uYT5gxnyMWM"], "start_seconds": ["80", "50"], "properties": ["two objects, woman, speak", "a, scream, girl"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["a train horn sounds as it passes by", "a child speaks in closed space"], "sample_ids": ["ukg5L09Wpvo", "yW6FWLSLkx4"], "start_seconds": ["150", "40"], "properties": ["sound, train, horn", "child, space, speak"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["wind blows as people chatter quietly", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["xBxDz0CFVn0", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["wind, chatter, people", "female, spraying, scream"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["an airplane engine spools and people speak", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["wTjoRj1se3U", "w5W5Kqtc8E"], "start_seconds": ["390", "100"], "properties": ["airplane, engine, spool", "wind, blow, vehicle"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", null], "captions_pred_audio": ["a jet engine is running and people are talking", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sG7TyPnFDR0", "uZesmtKZGSw"], "start_seconds": ["180", "250"], "properties": ["beeps, machine, smoke alarm", "men, talk, cars"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a man speaking together with birds chirping and distant murmuring", "a woman speaks as she rubs two objects together"], "sample_ids": ["uiS58TNyUiw", "vzxHnu-SFEw"], "start_seconds": ["430", "80"], "properties": ["audio, man, speaking", "two objects, woman, speak"], "captions_pred_video": ["of the pigeon in the cage", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["a helicopter engine runs", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["t5ZbXbniOWk", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["engine, helicopter, run", "airplane, boy, fly"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "pigeons vocalize and birds chirp"], "sample_ids": ["wsHBIgzs9Fs", "uiS58TNyUiw"], "start_seconds": ["50", "430"], "properties": ["horn, continuous, buzzing", "vocalize, bird, chirp"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "of the pigeon in the cage"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "a clock ticktocks"], "sample_ids": ["zl9Dqx-j7q4", "v-g-j2uTByM"], "start_seconds": ["6", "30"], "properties": ["engine, laugh, loud", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a man driving a car in the dark", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a jet engine roars ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "pigeons vocalize and birds chirp"], "sample_ids": ["sAam2NqGhLY", "uiS58TNyUiw"], "start_seconds": ["20", "430"], "properties": ["snoring, breathing, child", "vocalize, bird, chirp"], "captions_pred_video": ["of a little girl sleeping on a couch", "of the pigeon in the cage"], "captions_pred_audio": ["a person is snoring", "a man is speaking and a bee is buzzing"], "question": "which entity is not a person", "label": 1}, {"captions": ["dogs barking and whimpering", "water pouring and bubbling"], "sample_ids": ["tIY7qOV3rEM", "uyRfq-jKPpo"], "start_seconds": ["0", "50"], "properties": ["barking, whimpering, dog", "water, bubbles, pouring"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "water is running from a faucet"], "question": "which entity is more calm", "label": 1}, {"captions": ["children speak and play together", "a vehicle engine accelerating then running on idle"], "sample_ids": ["yVVP8XvWJTo", "vYkA3cfXp5Q"], "start_seconds": ["260", "30"], "properties": ["children, speak, play", "engine, accelerate, idle"], "captions_pred_video": ["footage of a playground at a school or daycare center", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "an engine is idling"], "question": "which entity is a vehicle", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "birds chirp in the background while a horse neighs followed by a girl speaking"], "sample_ids": ["sNB8zxXneIM", "s59PfAghdkM"], "start_seconds": ["20", "0"], "properties": ["several, quack, cocks", "bird, chirp, background, horse, neigh"], "captions_pred_video": ["a group of geese in a cage", "is an anime scene featuring two people and a horse in the foreground and a fence in the background"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "birds are chirping a horse is neighing and a woman is speaking "], "question": "which entity has a horse?", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "birds coo incessantly"], "sample_ids": ["y2ZBGpgbhHM", "yZrFNS7GFBQ"], "start_seconds": ["30", "30"], "properties": ["birds, tweet, pant", "coo, bird, incessant"], "captions_pred_video": [null, "of the bird in the cage"], "captions_pred_audio": ["birds chirping and a dog panting", "an owl hoots in the background "], "question": "which bird is incessant", "label": 1}, {"captions": ["a church bell rings several times", "water is sprayed across a hard surface"], "sample_ids": ["sUVVjE3Ucp8", "sQwlkXjQabo"], "start_seconds": ["0", "10"], "properties": ["ring, bell, several", "water, spray, surface"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a church bell is ringing ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "dishes cling together then a man begins to speak"], "sample_ids": ["voJh2gJxXhA", "sQGXqGcwOTc"], "start_seconds": ["50", "3"], "properties": ["music, frog, croak", "cling, speak, dishes"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["music is playing and crickets are chirping ", "mechanisms are operating and water is splashing "], "question": "which entity is about a frog?", "label": 0}, {"captions": ["a flush is followed by gurgling water, then another flush", "a infant makes noise and is excited"], "sample_ids": ["tqR406bGiE", "wIJK3-5y0kA"], "start_seconds": ["40", "30"], "properties": ["flush, water, gurgle", "noise, excited, infant"], "captions_pred_video": [null, "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a toilet is flushed", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["wyllXV6PjKo", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["a kid, talk, cry", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a woman speaks and a baby cries", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["ugHJF0hfYkg", "uEU-Hg5MTN8"], "start_seconds": ["10", "27"], "properties": ["loud, intense, propeller", "animal, grunts, snorts"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and a baby is crying"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a small musical boom and then birds tweet and a few dogs pant"], "sample_ids": ["yLy-WycbVVE", "y2ZBGpgbhHM"], "start_seconds": ["30", "30"], "properties": ["background, people, talk", "birds, tweet, pant"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", null], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "birds chirping and a dog panting"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a baby cries and a woman moans", "a man talks nearby and another man talks far away while some liquid flows"], "sample_ids": ["smDKStoHBJo", "sapQIQUhFc"], "start_seconds": ["0", "280"], "properties": ["a, cry, woman", "liquid, flow, distance"], "captions_pred_video": ["a man holding a crying baby in his arms", null], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking and a stream is flowing in the background "], "question": "which entity is more distant", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["xfaoyyzw2WU", "tDlysoZiA1I"], "start_seconds": ["180", "0"], "properties": ["loud, jet engine, roar", "animal, grunts, chirps"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "birds are chirping and a rooster is crowing "], "question": "which entity is quieter", "label": 1}, {"captions": ["male speech with light ticking", "a stream of water runs briefly"], "sample_ids": ["xO-Q2BlIIPU", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["male, speech, ticking", "stream, water, run"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["water flows followed by women screaming", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["w5W5Kqtc8E", "ziUT9IFTkjg"], "start_seconds": ["100", "10"], "properties": ["water, flow, women", "background, birds, rustling"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["water splashes as an animal walks through", "some men converse over an engine running"], "sample_ids": ["w1ir-sZ3Im8", "sCiy7QS1U"], "start_seconds": ["90", "300"], "properties": ["animal, water, splashes", "men, converse, engine"], "captions_pred_video": ["footage of a group of people riding horses through a river", null], "captions_pred_audio": ["water splashes and gurgles as people speak", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["multiple people speak then an emergency vehicle siren sounds", "water splashes as an animal walks through"], "sample_ids": ["wy1eKjR7KC0", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["audio, sound, siren", "animal, water, splashes"], "captions_pred_video": ["two police officers riding motorcycles down the street", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking and a siren is going off", "water splashes and gurgles as people speak"], "question": "which entity is a video", "label": 1}, {"captions": ["a large crowd cheers and applauds", "water flows and trickles"], "sample_ids": ["rqfQRErjfk8", "tB7hWb9gTuQ"], "start_seconds": ["170", "30"], "properties": ["crowd, cheers, applauds", "water, flow, trickle"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a crowd of people clapping and cheering", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a railroad crossing bell rings as a train horn blows", "water flows as men speak and yell"], "sample_ids": ["tZGN5a7ybxo", "vJ7JPEFhyLA"], "start_seconds": ["60", "16"], "properties": ["ring, train, horn", "water, flow, men"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a train is moving and blowing its horn ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a train horn blowing?", "label": 0}, {"captions": ["race cars go around a track as a man commentates", "paper is crumpling consistently"], "sample_ids": ["uZesmtKZGSw", "v5cSxLaHADY"], "start_seconds": ["250", "0"], "properties": ["car, track, man", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "water runs into a sink while men speak"], "sample_ids": ["weDbePuc-Xc", "vzceMbklWc"], "start_seconds": ["40", "180"], "properties": ["cartoon character, music, vocalize", "water, sink, run"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", null], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "water is running and a man is speaking"], "question": "which entity is a video of a sink?", "label": 1}, {"captions": ["a vehicle engine revs as the vehicle passes", "an airplane engine runs"], "sample_ids": ["yDoT73BWsdA", "yVPZ2MNWpms"], "start_seconds": ["10", "0"], "properties": ["engine, revs, vehicle", "engine, airplane, runs"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a car is driving by on the road "], "question": "which entity has a running engine", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "an engine runs loudly"], "sample_ids": ["uJV8NDaHqqk", "vqZuVbG6-HI"], "start_seconds": ["100", "130"], "properties": ["loud, fly, chirp", "loud, engine, run"], "captions_pred_video": ["a bee hive in a wooden box", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a swarm of bees buzzing around", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as a machine runs", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vD6lYD1l0BY", "uEU-Hg5MTN8"], "start_seconds": ["330", "27"], "properties": ["a, machine, run", "a woman, laughs, animal"], "captions_pred_video": ["game controller being held in the hands of the person", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a woman is speaking and a baby is crying"], "question": "which entity has a man speaking as a machine runs?", "label": 0}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["wy1eKjR7KC0", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["people, talk, distance", "rooster, crow, background, men"], "captions_pred_video": ["two police officers riding motorcycles down the street", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and a siren is going off", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "a toilet flushes and water drains unevenly"], "sample_ids": ["zl9Dqx-j7q4", "vhJWZheqaE"], "start_seconds": ["6", "0"], "properties": ["engine, laugh, loud", "water drains unevenly, toilet flushes, water drains"], "captions_pred_video": ["footage of a man driving a car in the dark", null], "captions_pred_audio": ["a jet engine roars ", "a toilet is flushed"], "question": "which entity is a toilet?", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "a weapon fires multiple times"], "sample_ids": ["wqZ135Ssz0", "sMC07Ucy7kg"], "start_seconds": ["60", "10"], "properties": ["man, woman, squawks", "weapon, fire, multiple"], "captions_pred_video": [null, "footage is from a car's point of view"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["s4Uz1Ffgo04", "y2bVZ7rz-5M"], "start_seconds": ["100", "280"], "properties": ["roars, background, people speaking", "motor noise, horn, siren"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is followed by a horn honking and a siren wailing?", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "paper is crumpling consistently"], "sample_ids": ["s59PfAghdkM", "v5cSxLaHADY"], "start_seconds": ["0", "0"], "properties": ["bird, chirp, background, horse, neigh", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "paper is crumpled and crinkled"], "question": "which entity is more quiet", "label": 1}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "a man speaks as a motor runs in the background"], "sample_ids": ["vfYTJq7nU", "xZepNM9qcRA"], "start_seconds": ["130", "30"], "properties": ["rustling, ducks, quack", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "a clock ticktocks and sounds an alarm then a man laughs"], "sample_ids": ["wqZ135Ssz0", "xV7Mg1QucSc"], "start_seconds": ["60", "14"], "properties": ["man, woman, squawks", "alarm, ticktocks, laughs"], "captions_pred_video": [null, "a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "an alarm clock ticks and a woman laughs"], "question": "which entity is accompanied by a man speaking", "label": 0}, {"captions": ["an infant crying frantically", "a motor noise is accompanied by a door opening and closing"], "sample_ids": ["zwOBqeFTgiU", "vBHyYJ8pL0"], "start_seconds": ["30", "2"], "properties": ["cry, infant, frantically", "noise, door, opening"], "captions_pred_video": ["of the baby crying in the car seat", null], "captions_pred_audio": ["a baby cries loudly", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is not a person", "label": 1}, {"captions": ["loud, continuous burping", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["y636gklDioE", "y2bVZ7rz-5M"], "start_seconds": ["20", "280"], "properties": ["loud, continuous, burping", "motor noise, horn, siren"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a person burps loudly several times", "a truck is honking its horn and a siren is blaring "], "question": "which is louder", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "vehicles pass by on a roadway"], "sample_ids": ["sEprKHm8Sj8", "tgbONvsP47Y"], "start_seconds": ["90", "0"], "properties": ["car, tires, slows", "pass, vehicle, roadway"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a car is driving on the road "], "question": "which vehicle is moving faster", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sYITalLZjj4", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["water, rushes, background, birds", "female, spraying, scream"], "captions_pred_video": ["two ducks are swimming in the water near each other", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["wind blows and birds chirp", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "a speedboat passes quickly on the water"], "sample_ids": ["y2ZBGpgbhHM", "tjmoSi330GM"], "start_seconds": ["30", "23"], "properties": ["birds, tweet, pant", "speed, water, boat"], "captions_pred_video": [null, "a person riding a jet ski on a lake with trees in the background"], "captions_pred_audio": ["birds chirping and a dog panting", "a motorboat speeds through water with wind noise "], "question": "which entity is moving faster", "label": 1}, {"captions": ["water rushes by", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["x-PeY8Yb8M4", "wDVMhEdTiVw"], "start_seconds": ["300", "30"], "properties": ["water, rushes, by", "gun, shoot, water"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a car is driving on a wet road ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a video of water moving?", "label": 0}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "a child speaks in closed space"], "sample_ids": ["yZrFNS7GFBQ", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["pigeon, buzzes, insect", "child, space, speak"], "captions_pred_video": ["of the bird in the cage", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["an owl hoots in the background ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "water splashes as an animal walks through"], "sample_ids": ["weDbePuc-Xc", "w1ir-sZ3Im8"], "start_seconds": ["40", "90"], "properties": ["music, slaps, human", "animal, water, splashes"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "water splashes and gurgles as people speak"], "question": "which entity is more likely to be a video of a person playing music?", "label": 0}, {"captions": ["a horn rings out as a machine runs by", "a train horn blows as it passes by"], "sample_ids": ["slZLHwNbbt4", "zVacuqSb4LI"], "start_seconds": ["300", "30"], "properties": ["a, horn, run", "horn, blows, train"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "a toilet flushes and water drains"], "sample_ids": ["sfAvvZwdLCY", "sfAvvZwdLCY"], "start_seconds": ["20", "20"], "properties": ["flushes, drains, water", "water drains, flushes, water"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a toilet is flushed", "a toilet is flushed"], "question": "which entity has more water", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "a man speaks as a motor runs in the background"], "sample_ids": ["vddP56-ogds", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["liquid, laughs, man", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man speaks as bees buzz and birds chirp", "a man speaks as a motor runs in the background"], "sample_ids": ["t25U-v4k4ts", "xZepNM9qcRA"], "start_seconds": ["40", "30"], "properties": ["bees buzz, birds chirp, man speaks", "background, motor, run"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "an insect buzzes around continuously"], "sample_ids": ["vSeGhaZt-aI", "v25l1jef3JY"], "start_seconds": ["50", "0"], "properties": ["water, bubbles, run", "buzzes, continuously, insect"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "vehicles pass by on a roadway"], "sample_ids": ["yJ0TePmaOo", "tgbONvsP47Y"], "start_seconds": ["390", "0"], "properties": ["two hard objects, man, speak", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "a infant makes noise and is excited"], "sample_ids": ["ylpYOorfH4o", "wIJK3-5y0kA"], "start_seconds": ["410", "30"], "properties": ["motor, run, steady", "noise, excited, infant"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 0}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "some men converse over an engine running"], "sample_ids": ["vddP56-ogds", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["water, splash, person, laugh", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a person speaking?", "label": 0}, {"captions": ["an adult speaks and is typing on a computer keyboard", "several insects fly while two men talk"], "sample_ids": ["x9JovgqUcs", "s-T9OVOiMLo"], "start_seconds": ["500", "330"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a video of a person speaking and typing on a computer keyboard?", "label": 0}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["wP8ZKrlx3oA", "su6FAOcOA8c"], "start_seconds": ["40", "4"], "properties": ["rain, storm, thunder", "engine, idle, woman"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a bird is chirping and tweeting a bird song", "wind blows and a vehicle blows a hard then a train blows a horn"], "sample_ids": ["wPz6QRAkEb4", "wnpJndXuxLc"], "start_seconds": ["60", "50"], "properties": ["chirps, tweets, song", "blows, vehicle, train"], "captions_pred_video": ["a bird in a cage on top of a pole", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["birds are chirping in the background ", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is not a bird?", "label": 1}, {"captions": ["frogs croak and vocalize", "a car speeding up in the distance"], "sample_ids": ["yswmmRZFItk", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["croak, vocalize, frog", "distance, car, speed"], "captions_pred_video": ["a close up of a frog in the water", null], "captions_pred_audio": ["a frog is croaking", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man speaks as a machine runs", "an airplane engine spools and people speak"], "sample_ids": ["vD6lYD1l0BY", "wTjoRj1se3U"], "start_seconds": ["330", "390"], "properties": ["a, machine, run", "airplane, engine, spool"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a jet engine is running and people are talking"], "question": "which entity is a machine", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "a person is snoring while sleeping"], "sample_ids": ["v7jJS8aAyA", "vJrjSeP17yE"], "start_seconds": ["10", "40"], "properties": ["wind, blows, loudly", "a person is sleeping, snoring, person"], "captions_pred_video": [null, "a black background with a small plane flying in the sky"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a person snoring loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["xERFUeZONz8", "wDVMhEdTiVw"], "start_seconds": ["0", "30"], "properties": ["ring, approach, traffic", "gun, shoot, water"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["an emergency vehicle siren blares", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["scraping and female speech with distant music", "a toilet flushes and a female speaks"], "sample_ids": ["yHeVV-xeOxQ", "yaln9y8I7ms"], "start_seconds": ["130", "230"], "properties": ["female, speech, music", "female, flushes, toilet"], "captions_pred_video": ["of a girl milking a goat's udder", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a toilet flushes and a man speaks"], "question": "which entity has a female speaking?", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "a man speaks as a car is passing by"], "sample_ids": ["soTOh3zYJfY", "sK4u5T8hW78"], "start_seconds": ["40", "30"], "properties": ["vehicle, skid, tires", "a, car, pass"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking with background noise and breathing sounds "], "question": "which vehicle is skidding and squealing tires", "label": 0}, {"captions": ["an electronic device bleeps once", "small dogs yip and bark sharply"], "sample_ids": ["tHJ6JSa8Y4", "v-wcQf4BDY0"], "start_seconds": ["0", "120"], "properties": ["bleeps, electronic, device", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a clock is ticking and beeping", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["uEU-Hg5MTN8", "su6FAOcOA8c"], "start_seconds": ["27", "4"], "properties": ["a woman, laughs, animal", "engine, idle, woman"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking and a subway train is moving "], "question": "which woman is speaking", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "an airplane engine runs"], "sample_ids": ["w5W5Kqtc8E", "yVPZ2MNWpms"], "start_seconds": ["100", "0"], "properties": ["wind, blow, vehicle", "engine, airplane, runs"], "captions_pred_video": [null, "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a car is driving by on the road "], "question": "which entity has a running engine", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "water splashes as an animal walks through"], "sample_ids": ["vW4x7S1VfQc", "w1ir-sZ3Im8"], "start_seconds": ["150", "90"], "properties": ["clacking, oil, woman", "animal, water, splashes"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["food sizzles in a frying pan", "water splashes and gurgles as people speak"], "question": "which entity is more likely to be a video of a person cooking?", "label": 0}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["wyllXV6PjKo", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["a baby, a woman, a man", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman speaks and a baby cries", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a kid speaks followed by music playing", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tQWGZLItBXk", "zj2R0XoFr5k"], "start_seconds": ["170", "50"], "properties": ["music, kid, speak", "airplane, boy, fly"], "captions_pred_video": ["worms revolution screenshots", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a woman speaks while a helicopter flies overhead "], "question": "which entity has a boy speaking?", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "vehicles pass by on a roadway"], "sample_ids": ["vBslzh7saPw", "tgbONvsP47Y"], "start_seconds": ["90", "0"], "properties": ["power, scream, increase", "pass, vehicle, roadway"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a machine runs continuously", "small dogs yip and bark sharply"], "sample_ids": ["wdXV3Pv0jiY", "v-wcQf4BDY0"], "start_seconds": ["11", "120"], "properties": ["machine, running, continuously", "bark, yip, sharply"], "captions_pred_video": ["footage is blurry and shaky", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["an emergency siren wails as it passes", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vGj1XLJvNrw", "wDVMhEdTiVw"], "start_seconds": ["0", "30"], "properties": ["wails, wails, pass", "gun, shoot, water"], "captions_pred_video": ["footage of a police car driving down a city street", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is not followed by water sloshing?", "label": 0}, {"captions": ["a motor idles, accelerates, then slows down.", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vYkA3cfXp5Q", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["speed, idle, accelerate", "female, spraying, scream"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["an engine is idling", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["tDVADusiIoc", "zl9Dqx-j7q4"], "start_seconds": ["60", "6"], "properties": ["water, radio, man", "engine, laugh, loud"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a jet engine roars "], "question": "which entity is more quiet", "label": 0}, {"captions": ["an animal quacks rapidly", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vh30P49Po6s", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["animal, quacks, rapidly", "a woman, a television program, a bird"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a duck is quacking loudly", "a woman is speaking and a dog is whimpering"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "a car speeding up in the distance"], "sample_ids": ["sSMl2vc3ek", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["a person, laughs, snores", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person snoring loudly", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["x9JovgqUcs", "tdWhHV3X25Q"], "start_seconds": ["500", "60"], "properties": ["a, man, speaks, keyboard", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "people cheer as a vehicle engine revs"], "sample_ids": ["vfYTJq7nU", "xjhAnI2q6hM"], "start_seconds": ["130", "6"], "properties": ["ducks, quack, man", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["an engine runs loudly", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["vqZuVbG6-HI", "uEU-Hg5MTN8"], "start_seconds": ["130", "27"], "properties": ["loud, engine, run", "animal, grunts, snorts"], "captions_pred_video": ["footage is blurry because it's raining outside", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a woman is speaking and a baby is crying"], "question": "which entity is quieter", "label": 1}, {"captions": ["a motorcycle engine works nearby", "multiple people speak and children yell while water gurgles"], "sample_ids": ["tOSWIURC-4", "vb1fPSDI4c"], "start_seconds": ["0", "30"], "properties": ["engine, work, nearby", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a lawn mower is running ", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["zdYdyF9-m8U", "zj2R0XoFr5k"], "start_seconds": ["7", "50"], "properties": ["wind, crash, shoreline", "airplane, boy, fly"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["waves crash and wind blows ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "a vehicle engine accelerating then running on idle"], "sample_ids": ["uWPRNLnpy7Y", "vYkA3cfXp5Q"], "start_seconds": ["10", "30"], "properties": ["accelerate, laugh, vehicle", "engine, accelerate, idle"], "captions_pred_video": ["is taken from a car driving down the street", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a car accelerates and revs its engine ", "an engine is idling"], "question": "which vehicle is accelerating quickly", "label": 0}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "a vehicle engine revs and tires squeal"], "sample_ids": ["wnpJndXuxLc", "yDoT73BWsdA"], "start_seconds": ["50", "10"], "properties": ["blows, vehicle, train", "engine revs, tires squeal, vehicle"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a race car accelerates and revs its engine "], "question": "which vehicle is moving", "label": 1}, {"captions": ["an electric engine works nearby followed by a child talking", "a stream of water flows as people talk and wind blows"], "sample_ids": ["xSKJGCItUWE", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["engine, work, child", "stream, water, flow"], "captions_pred_video": ["footage of the helicopter flying in the room", "footage is blurry and out of focus"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["an electric engine works nearby followed by a child talking", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["xSKJGCItUWE", "tdWhHV3X25Q"], "start_seconds": ["10", "60"], "properties": ["engine, work, child", "applause, audience, yells"], "captions_pred_video": ["footage of the helicopter flying in the room", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["yRx9txMcBl0", "vbZ-0lGPneg"], "start_seconds": ["40", "30"], "properties": ["accelerates, tires, squeals", "a woman, a television program, a bird"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "a stream of water runs briefly"], "sample_ids": ["y8WEcpOlT3I", "x-PeY8Yb8M4"], "start_seconds": ["40", "300"], "properties": ["harsh, wind, blows", "stream, water, run"], "captions_pred_video": ["on how to use a sewing machine youtube", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["ylpYOorfH4o", "tiDFTC-5vU"], "start_seconds": ["410", "30"], "properties": ["engine, running, wind", "male, duck, laugh"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", null], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking over?", "label": 0}, {"captions": ["an infant crying as a woman laughs", "some men converse over an engine running"], "sample_ids": ["xhmRY9yhC7c", "sCiy7QS1U"], "start_seconds": ["20", "300"], "properties": ["a, laugh, infant", "men, converse, engine"], "captions_pred_video": ["of a baby crying in a baby bouncer", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a woman and an infant?", "label": 0}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "a horse runs while two women talk"], "sample_ids": ["ylpYOorfH4o", "sdvI1mHAsc"], "start_seconds": ["410", "20"], "properties": ["motor, run, steady", "two women, horse, run"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", null], "captions_pred_audio": ["a man is speaking and an engine is revving", "horses clip-clop and a woman speaks"], "question": "which entity is moving", "label": 1}, {"captions": ["rain falls on a surface as men speak and thunder roars", "a woman speaks as she rubs two objects together"], "sample_ids": ["w0xsN8X18Y", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["rain, thunder, surface", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a process", "label": 1}, {"captions": ["a person sniffles and then sneezes in the distance", "paper is crumpling consistently"], "sample_ids": ["uRlbY6aoBU", "v5cSxLaHADY"], "start_seconds": ["0", "0"], "properties": ["a, distance, sneeze", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is sneezing ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["ylpYOorfH4o", "uEU-Hg5MTN8"], "start_seconds": ["410", "27"], "properties": ["engine, run, loud", "a woman, laughs, animal"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a woman is speaking and a baby is crying"], "question": "which entity is more quiet", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "a large crowd cheers and applauds"], "sample_ids": ["uC9dtII1KDI", "rqfQRErjfk8"], "start_seconds": ["150", "170"], "properties": ["wind, gusts, distance", "crowd, cheers, applauds"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "a man hugging another man in front of an orchestra"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a crowd of people clapping and cheering"], "question": "which entity is more likely to be a natural phenomenon", "label": 0}, {"captions": ["a helicopter engine idles continuously", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["ugHJF0hfYkg", "xfaoyyzw2WU"], "start_seconds": ["10", "180"], "properties": ["engine, idle, continuously", "loud, jet engine, roar"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a helicopter is flying overhead ", "an aircraft engine roars and a man speaks "], "question": "which engine is louder", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "pigeons vocalize and birds chirp"], "sample_ids": ["sncRqQ67iJU", "uiS58TNyUiw"], "start_seconds": ["460", "430"], "properties": ["loud, repeatedly, man", "vocalize, bird, chirp"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "of the pigeon in the cage"], "captions_pred_audio": ["a person is snoring", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["someone whistles a tune", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["sIXTftIuUgw", "wDVMhEdTiVw"], "start_seconds": ["90", "30"], "properties": ["someone, tune, whistle", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a person whistling a song", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be heard", "label": 0}, {"captions": ["multiple motorcycles pass by as a man speaks", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["zcDwZ6W7E3E", "w5W5Kqtc8E"], "start_seconds": ["180", "100"], "properties": ["man, speak, motorcycles", "wind, blow, vehicle"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", null], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a vehicle engine running?", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "dishes cling together then a man begins to speak"], "sample_ids": ["tw76HGONaKg", "sQGXqGcwOTc"], "start_seconds": ["570", "3"], "properties": ["A, game, keyboard", "cling, speak, dishes"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a person snores hilariously while someone laughs", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sSMl2vc3ek", "wqZ135Ssz0"], "start_seconds": ["20", "60"], "properties": ["a person, laughs, snores", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a person snoring loudly", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a low rumbling in the distance followed by a motorcycle engine revving up", "a man speaks as a motor runs in the background"], "sample_ids": ["vr8ZXjEBhMQ", "xZepNM9qcRA"], "start_seconds": ["150", "30"], "properties": ["sound, distance, engine", "background, motor, run"], "captions_pred_video": ["is taken from a motorcycle's point of view", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["sjlVMgdGSK0", "tDlysoZiA1I"], "start_seconds": ["30", "0"], "properties": ["car, revving, loudly", "animal, grunts, chirps"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a car accelerates and revs its engine ", "birds are chirping and a rooster is crowing "], "question": "which entity is quieter", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["zl9Dqx-j7q4", "tiDFTC-5vU"], "start_seconds": ["6", "30"], "properties": ["motors rev, laugh, loudly", "male, duck, laugh"], "captions_pred_video": ["footage of a man driving a car in the dark", null], "captions_pred_audio": ["a jet engine roars ", "a man is speaking and ducks are quacking"], "question": "which entity is a person?", "label": 0}, {"captions": ["a vehicle engine runs as a siren and horn sound", "a man speaks followed by another man speaking outside"], "sample_ids": ["u--KhUW8l1Y", "viuTg1M-dqg"], "start_seconds": ["0", "30"], "properties": ["sound, vehicle, horn", "two men, speak, follow"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more than one speaker", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "a mechanical buzzing getting louder"], "sample_ids": ["wwyfGO2J4", "sEprKHm8Sj8"], "start_seconds": ["90", "90"], "properties": ["people, applaud, hoot", "noise, loud, buzzing"], "captions_pred_video": [null, "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a race car accelerates and revs its engine "], "question": "which entity is quieter", "label": 0}, {"captions": ["someone sprays liquid onto a hard surface", "a man speaks as a car is passing by"], "sample_ids": ["sQwlkXjQabo", "sK4u5T8hW78"], "start_seconds": ["10", "30"], "properties": ["liquid, surface, spray", "a, car, pass"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["spraying followed by silence", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["there are rhythmical snoring nearby", "a duck quacks continuously"], "sample_ids": ["ujMt0-D-x2k", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["snoring, rhythmical, nearby", "quacks, continuously, duck"], "captions_pred_video": ["of the dog playing with a toy on the floor", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a person is snoring loudly", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a man speaks as bees buzz and birds chirp", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["t25U-v4k4ts", "wSVhSdj0F0"], "start_seconds": ["40", "10"], "properties": ["bees buzz, birds chirp, man speaks", "horn honks, keys jingle, electronic beep"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", null], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a car horn honks and keys jangle with background noise "], "question": "which entity is more likely to be heard in a car", "label": 1}, {"captions": ["a toilet flushes and water sputters as it drains", "a car speeding up in the distance"], "sample_ids": ["smGI3C1NZc", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["water, drain, toilet", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a toilet is flushed", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["an adult woman and an adult man speak", "a propeller rotates loudly and intensely"], "sample_ids": ["zTLVJCo4WEE", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["two people, adult, speak", "loud, intense, propeller"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["humming of idling and revving engine with a man speaking", "small dogs yip and bark sharply"], "sample_ids": ["wqADXCzngMw", "v-wcQf4BDY0"], "start_seconds": ["340", "120"], "properties": ["audio, humming, revving", "bark, yip, sharply"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "frogs croak and vocalize"], "sample_ids": ["wwyfGO2J4", "yswmmRZFItk"], "start_seconds": ["90", "0"], "properties": ["people, applaud, hoot", "croak, vocalize, frog"], "captions_pred_video": [null, "a close up of a frog in the water"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a frog is croaking"], "question": "which entity is a frog?", "label": 1}, {"captions": ["a child yells and another yells", "a woman speaks happily and an animal chirps"], "sample_ids": ["vMDHu7Lxcgw", "uWAAAL4CIoc"], "start_seconds": ["410", "0"], "properties": ["two, yell, child", "a woman, chirps, animal"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", null], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a woman is speaking and a dog is barking "], "question": "which entity has a more calming effect", "label": 1}, {"captions": ["a child speaks in closed space", "a woman speaks as she rubs two objects together"], "sample_ids": ["yW6FWLSLkx4", "vzxHnu-SFEw"], "start_seconds": ["40", "80"], "properties": ["child, space, speak", "two objects, woman, speak"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is speaking", "label": 1}, {"captions": ["some tunes played by whistling", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["u6BnG6YZqJ4", "tDlysoZiA1I"], "start_seconds": ["0", "0"], "properties": ["tune, play, whistling", "animal, grunts, chirps"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a person whistling a song", "birds are chirping and a rooster is crowing "], "question": "which entity is not a musical instrument", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["uOpoD0gGXcs", "vfYTJq7nU"], "start_seconds": ["120", "130"], "properties": ["chirps, woman, bird", "rustling, ducks, quack"], "captions_pred_video": ["a herd of cows grazing in the field", null], "captions_pred_audio": ["birds are chirping and a man is speaking", "a duck quacks and a woman speaks"], "question": "which entity is about birds?", "label": 0}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a car speeding up in the distance"], "sample_ids": ["weDbePuc-Xc", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["music, slaps, human", "distance, car, speed"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", null], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["rain falls on a surface as men speak and thunder roars", "here comes the train and it starts to blow the horn and get close"], "sample_ids": ["w0xsN8X18Y", "s7knHCFW82w"], "start_seconds": ["30", "30"], "properties": ["rain, thunder, surface", "blow horn, get close, train"], "captions_pred_video": [null, "footage of the train on the tracks near a building and a car parked on the side of the road"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a train is blowing its horn and its wheels are squealing "], "question": "which entity is moving", "label": 1}, {"captions": ["rain falls heavily on a surface and a storm builds in the background with loud thunder", "water flows as men speak and yell"], "sample_ids": ["wP8ZKrlx3oA", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["rain, storm, thunder", "water, flow, men"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking and yelling?", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["ul60S8TXDA8", "tdWhHV3X25Q"], "start_seconds": ["60", "60"], "properties": ["sound, distance, bell", "applause, audience, yells"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vSeGhaZt-aI", "yajyRTUQk3U"], "start_seconds": ["50", "400"], "properties": ["water, bubbles, speak", "a woman, something, fried"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["tEE3MpBt1sg", "zl9Dqx-j7q4"], "start_seconds": ["50", "6"], "properties": ["drill, something, laugh", "engine, laugh, loud"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage of a man driving a car in the dark"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a jet engine roars "], "question": "which entity is followed by laughter", "label": 1}, {"captions": ["an adult woman and an adult man speak", "a child speaks in closed space"], "sample_ids": ["zTLVJCo4WEE", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["two people, adult, speak", "child, space, speak"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a child speaking?", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["yZmhM1HcsyE", "vfYTJq7nU"], "start_seconds": ["4", "130"], "properties": ["engine, roar, water", "rustling, ducks, quack"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", null], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a duck quacks and a woman speaks"], "question": "which entity is more likely to be in a lake", "label": 1}, {"captions": ["there are rhythmical snoring nearby", "people applaud and hoot and chat quietly"], "sample_ids": ["ujMt0-D-x2k", "wwyfGO2J4"], "start_seconds": ["0", "90"], "properties": ["snoring, rhythmical, nearby", "people, applaud, hoot"], "captions_pred_video": ["of the dog playing with a toy on the floor", null], "captions_pred_audio": ["a person is snoring loudly", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["sHbXC6na9hg", "tDlysoZiA1I"], "start_seconds": ["0", "0"], "properties": ["a person, saw, wood", "animal, grunts, chirps"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["an engine is idling and vibrating", "birds are chirping and a rooster is crowing "], "question": "which entity is not a person?", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "a duck quacks continuously"], "sample_ids": ["x5cuQjOdM3E", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["cat, talk, meow", "quacks, continuously, duck"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a cat meows and a woman speaks", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["multiple ducks quack continuously", "a woman speaks as she rubs two objects together"], "sample_ids": ["wfHeoPDLMaM", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["multiple, quack, continuously", "two objects, woman, speak"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["ducks are quacking", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "birds chirp and objects are moved around"], "sample_ids": ["wz7N8YRy74I", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["rooster, crow, background, men", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a bird is chirping and tweeting a bird song", "an engine revs and a turning noise is made"], "sample_ids": ["wPz6QRAkEb4", "tOSWIURC-4"], "start_seconds": ["60", "0"], "properties": ["chirps, tweets, song", "noise, engine, revs"], "captions_pred_video": ["a bird in a cage on top of a pole", null], "captions_pred_audio": ["birds are chirping in the background ", "a lawn mower is running "], "question": "which entity is not a bird?", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["sWZzXuWYY", "tDlysoZiA1I"], "start_seconds": ["420", "0"], "properties": ["male, clanks, thumps", "animal, grunts, chirps"], "captions_pred_video": [null, "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "birds are chirping and a rooster is crowing "], "question": "which entity is more animal", "label": 1}, {"captions": ["a goat bleats as a person speaks", "a frog croaks as other frogs croak in the background"], "sample_ids": ["tPJvjq9QePY", "yswmmRZFItk"], "start_seconds": ["40", "0"], "properties": ["bleats, person, speak", "background, frog, croak"], "captions_pred_video": ["a dog and a sheep in a barn", "a close up of a frog in the water"], "captions_pred_audio": ["a baby cries and a man speaks", "a frog is croaking"], "question": "which animal is more likely to be a frog", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["w-4gHptFNuU", "ukg5L09Wpvo"], "start_seconds": ["21", "150"], "properties": ["engine revs, accelerates, bump", "clickety-clack, train, whistle"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a train blows its whistle and blows its horn "], "question": "which entity is a train", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a train engine runs and a horn blows"], "sample_ids": ["vZAw4apG0Es", "zPX9o1uDiI"], "start_seconds": ["30", "40"], "properties": ["background, tick, repeat", "engine, horn, run"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a train moves with its horn blowing and wheels squealing "], "question": "which entity is a train?", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "paper is crumpling consistently"], "sample_ids": ["zl9Dqx-j7q4", "v5cSxLaHADY"], "start_seconds": ["6", "0"], "properties": ["engine, laugh, loud", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a man driving a car in the dark", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a jet engine roars ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "birds chirp and objects are moved around"], "sample_ids": ["vBHyYJ8pL0", "yPUYU6t3rwo"], "start_seconds": ["2", "370"], "properties": ["noise, door, opening", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "someone whistles a tune"], "sample_ids": ["tPJvjq9QePY", "sIXTftIuUgw"], "start_seconds": ["40", "90"], "properties": ["animal, bleat, moo", "someone, tune, whistle"], "captions_pred_video": ["a dog and a sheep in a barn", null], "captions_pred_audio": ["a baby cries and a man speaks", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "a woman speaks happily and an animal chirps"], "sample_ids": ["xOZfdgAgJ9o", "uWAAAL4CIoc"], "start_seconds": ["40", "0"], "properties": ["woman, whimpering, speaking", "a woman, chirps, animal"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking and a dog is barking "], "question": "which entity is more cheerful", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "a person is snoring while sleeping"], "sample_ids": ["t8CV69hcvF0", "vJrjSeP17yE"], "start_seconds": ["210", "40"], "properties": ["person, sneeze, follow", "a person is sleeping, snoring, person"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "a black background with a small plane flying in the sky"], "captions_pred_audio": ["a woman sneezes and speaks", "a person snoring loudly"], "question": "which person is snoring", "label": 1}, {"captions": ["continuous snoring", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["sLkeqCDJIyw", "vlS6YMeWAPo"], "start_seconds": ["120", "40"], "properties": ["loud, snoring, noise", "sheep, baa, birds"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a person is snoring loudly", "a goat bleats and birds chirp"], "question": "which entity is quieter", "label": 1}, {"captions": ["a car speeding up in the distance", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["u0TrcHhkPQ", "zj2R0XoFr5k"], "start_seconds": ["20", "50"], "properties": ["distance, car, speed", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman speaks while a helicopter flies overhead "], "question": "which object is flying", "label": 0}, {"captions": ["someone is snoring while sleeping", "a car speeding up in the distance"], "sample_ids": ["ujMt0-D-x2k", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["snore, sleep, someone", "distance, car, speed"], "captions_pred_video": ["of the dog playing with a toy on the floor", null], "captions_pred_audio": ["a person is snoring loudly", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "wind blows as people chatter quietly"], "sample_ids": ["u21-Z5gJCB8", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["background, voice, man", "wind, chatter, people"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "an airplane engine spools and people speak"], "sample_ids": ["vzxHnu-SFEw", "wTjoRj1se3U"], "start_seconds": ["80", "390"], "properties": ["two objects, woman, speak", "airplane, engine, spool"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a jet engine is running and people are talking"], "question": "which object is moving", "label": 1}, {"captions": ["wind blowing followed by a zoom", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vr8ZXjEBhMQ", "vfYTJq7nU"], "start_seconds": ["150", "130"], "properties": ["wind, blow, zoom", "rustling, ducks, quack"], "captions_pred_video": ["is taken from a motorcycle's point of view", null], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a duck quacks and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "a clock ticks quietly and rhythmically"], "sample_ids": ["tIY7qOV3rEM", "u7C-AEBQM"], "start_seconds": ["0", "30"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "ticks, rhythmic, quiet"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", null], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a ticktock of a clock"], "question": "which entity is quieter", "label": 1}, {"captions": ["wind blows as people chatter quietly", "an infant crying frantically"], "sample_ids": ["xBxDz0CFVn0", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["wind, chatter, people", "cry, infant, frantically"], "captions_pred_video": ["footage is blurry and out of focus", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a baby cries loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["w34HjHr6gAY", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["beeps, hit, woman", "airplane, boy, fly"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a person sniffs and sneezes", "motors rev and run loudly as a person laughs"], "sample_ids": ["uRlbY6aoBU", "zl9Dqx-j7q4"], "start_seconds": ["0", "6"], "properties": ["sneezes, person, sniffs", "motors rev, laugh, loudly"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is sneezing ", "a jet engine roars "], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["t25U-v4k4ts", "zj2R0XoFr5k"], "start_seconds": ["40", "50"], "properties": ["a, chirps, bird", "airplane, boy, fly"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a woman speaks while a helicopter flies overhead "], "question": "which entity is about flying?", "label": 1}, {"captions": ["a child babbles as a woman speaks", "a toilet flushes and a female speaks"], "sample_ids": ["wEBlkGWVWwE", "yaln9y8I7ms"], "start_seconds": ["260", "230"], "properties": ["a, babble, woman", "female, flushes, toilet"], "captions_pred_video": ["shows a person writing on the whiteboard", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a toilet flushes and a man speaks"], "question": "which entity is a video of a toilet flushing?", "label": 1}, {"captions": ["a woman speaks and dog vocalizes", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["uWAAAL4CIoc", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["a, dog, vocalize", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a woman is speaking and a baby is crying"], "question": "which entity has a dog vocalize?", "label": 0}, {"captions": ["wind blowing and birds chirping with the distant cooing of a large bird", "frogs croak and vocalize"], "sample_ids": ["wRBHTgrbiwg", "yswmmRZFItk"], "start_seconds": ["50", "0"], "properties": ["birds, chirp, cooing", "croak, vocalize, frog"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "a close up of a frog in the water"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a frog is croaking"], "question": "which animal is vocalizing", "label": 1}, {"captions": ["a person is whistling a tune", "a man speaks as a car is passing by"], "sample_ids": ["scYRUkrFLiQ", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["a, tune, whistle", "a, car, pass"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a person whistling a song", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 0}, {"captions": ["a baby laughs giddily and a woman laughs then speaks", "a person sneezes followed by another person speaking"], "sample_ids": ["wjsXBsc7M40", "t8CV69hcvF0"], "start_seconds": ["10", "210"], "properties": ["a baby laughs, a woman laughs, a woman speaks", "person, sneeze, follow"], "captions_pred_video": ["footage of the baby playing with a toothbrush", "of an airplane flying in the dark sky at night"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a woman sneezes and speaks"], "question": "which entity has a person speaking after a person sneezes?", "label": 1}, {"captions": ["a person is whistling", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sIXTftIuUgw", "yajyRTUQk3U"], "start_seconds": ["90", "400"], "properties": ["person, whistling, person", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a person whistling a song", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 0}, {"captions": ["a person is burping then speaks and laughs", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["wAAkbZToh8", "tiDFTC-5vU"], "start_seconds": ["0", "30"], "properties": ["burp, laugh, speak", "male, duck, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man burps and a woman speaks", "a man is speaking and ducks are quacking"], "question": "which entity is a person?", "label": 0}, {"captions": ["a steam engine runs and whistles as it passes by", "a propeller rotates loudly and intensely"], "sample_ids": ["se87d6yxEOA", "ugHJF0hfYkg"], "start_seconds": ["10", "10"], "properties": ["run, whistle, pass", "loud, intense, propeller"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a person snoring several times", "a woman speaks with water running"], "sample_ids": ["spJCm8tD9Zo", "wTideSjRFS0"], "start_seconds": ["90", "30"], "properties": ["snore, person, several", "water, running, woman"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking while water is running in the background"], "question": "which entity is a person", "label": 0}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["w8uLijTqtlU", "su6FAOcOA8c"], "start_seconds": ["70", "4"], "properties": ["wind, microphone, noise", "engine, idle, woman"], "captions_pred_video": ["footage is blurry and shaky", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["the wind is blowing strongly", "a woman is speaking and a subway train is moving "], "question": "which entity is a recording", "label": 1}, {"captions": ["a person is burping while a girl speaks", "a man speaks as a car is passing by"], "sample_ids": ["vdoxuJn9lTc", "sK4u5T8hW78"], "start_seconds": ["40", "30"], "properties": ["person, burp, girl", "a, car, pass"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a child speaks followed by a burp", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["someone whistles a tune", "a infant makes noise and is excited"], "sample_ids": ["sIXTftIuUgw", "wIJK3-5y0kA"], "start_seconds": ["90", "30"], "properties": ["someone, tune, whistle", "noise, excited, infant"], "captions_pred_video": [null, "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a person whistling a song", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "an engine starts and increases in power"], "sample_ids": ["u7C-AEBQM", "zjTG0gaGCUI"], "start_seconds": ["30", "80"], "properties": ["ticks, rhythmic, quiet", "power, increase, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a ticktock of a clock", "a jet engine roars as wind blows "], "question": "which is more quiet", "label": 0}, {"captions": ["someone is burping continuously", "a motor idles, accelerates, then slows down."], "sample_ids": ["y636gklDioE", "vYkA3cfXp5Q"], "start_seconds": ["20", "30"], "properties": ["burps, burps, burps", "speed, idle, accelerate"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a person burps loudly several times", "an engine is idling"], "question": "which entity is not a burp", "label": 1}, {"captions": ["water is sprayed across a hard surface", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sQwlkXjQabo", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["water, spray, surface", "stream, water, flow"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "footage is blurry and out of focus"], "captions_pred_audio": ["spraying followed by silence", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a car speeding up in the distance", "a vehicle engine accelerating then running on idle"], "sample_ids": ["u0TrcHhkPQ", "vYkA3cfXp5Q"], "start_seconds": ["20", "30"], "properties": ["distance, car, speed", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "an engine is idling"], "question": "which is not a car", "label": 1}, {"captions": ["pigeons vocalize and birds chirp", "people speak as gunfire rings out"], "sample_ids": ["uiS58TNyUiw", "wqTCwqVRDlk"], "start_seconds": ["430", "80"], "properties": ["vocalize, bird, chirp", "gunfire, ring, speak"], "captions_pred_video": ["of the pigeon in the cage", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "pigeons vocalize and birds chirp"], "sample_ids": ["zF8yoL0rkbI", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["engine, run, someone", "vocalize, bird, chirp"], "captions_pred_video": ["footage of the traffic on the street at night", "of the pigeon in the cage"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "an engine sputters followed by a car zooming by"], "sample_ids": ["wSVhSdj0F0", "u5RmF3c3Aw"], "start_seconds": ["10", "60"], "properties": ["beep, clang, footsteps", "engine, car, zoom"], "captions_pred_video": [null, null], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a race car accelerates and skids with wind noise in the background "], "question": "which entity is a car?", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "waves crash against a shoreline and wind blows"], "sample_ids": ["xBxDz0CFVn0", "zdYdyF9-m8U"], "start_seconds": ["30", "7"], "properties": ["stream, water, flow", "wind, crash, shoreline"], "captions_pred_video": ["footage is blurry and out of focus", "a person kayaking in the ocean near a cliff"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "waves crash and wind blows "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a toilet flushes and a female speaks"], "sample_ids": ["vZAw4apG0Es", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["background, tick, repeat", "female, flushes, toilet"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage is blurry and out of focus"], "captions_pred_audio": ["a clock is ticking and people are talking", "a toilet flushes and a man speaks"], "question": "which entity has a female speaking?", "label": 1}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "water splashes and a door squeaks"], "sample_ids": ["uZesmtKZGSw", "sdXV-ylviw"], "start_seconds": ["250", "190"], "properties": ["men, talk, cars", "sound, splash, door"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", null], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a dog barks and taps with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "paper is crumpling consistently"], "sample_ids": ["sOa7g-44Dag", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["background, man, spray", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a whistling owl calls out repeatedly and insects screech", "a man speaks as a car is passing by"], "sample_ids": ["w6RTHR6AeAg", "sK4u5T8hW78"], "start_seconds": ["40", "30"], "properties": ["call, owl, screech", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["an owl hoots and mechanisms operate ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["an infant crying frantically", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zwOBqeFTgiU", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["cry, infant, frantically", "applause, audience, yells"], "captions_pred_video": ["of the baby crying in the car seat", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a baby cries loudly", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["someone sprays a liquid onto a hard surface making a hiss sound", "water flows as men speak and yell"], "sample_ids": ["zO-LSSY92ZM", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["liquid, surface, sound", "water, flow, men"], "captions_pred_video": ["youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["steam is hissing and hissing", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of water flowing?", "label": 1}, {"captions": ["people speak in a closed space", "vehicles pass by on a roadway"], "sample_ids": ["sTpirNYo8vQ", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["people, space, speak", "pass, vehicle, roadway"], "captions_pred_video": ["of a man taking a selfie on a bus", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["bees buzz and wind blows", "multiple people speak and children yell while water gurgles"], "sample_ids": ["tMJne1a4AFI", "vb1fPSDI4c"], "start_seconds": ["0", "30"], "properties": ["bees buzz, wind blows, bees", "multiple, people, yell"], "captions_pred_video": ["a swarm of bees on the ground", null], "captions_pred_audio": ["a swarm of bees buzzing around", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a door slams shut roughly", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["zkKdxzNC97Y", "uZesmtKZGSw"], "start_seconds": ["27", "250"], "properties": ["a door, slams, shut", "men, talk, cars"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["heavy rain splashes as it falls", "people cheer as a vehicle engine revs"], "sample_ids": ["wP8ZKrlx3oA", "xjhAnI2q6hM"], "start_seconds": ["40", "6"], "properties": ["fall, rain, splash", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a truck is revving its engine and a man is speaking "], "question": "which entity is more likely to be in motion", "label": 1}, {"captions": ["a man speaks as a car is passing by", "people speak as gunfire rings out"], "sample_ids": ["sK4u5T8hW78", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["a, car, pass", "gunfire, ring, speak"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "water splashes as an animal walks through"], "sample_ids": ["rwTERCUno", "w1ir-sZ3Im8"], "start_seconds": ["90", "90"], "properties": ["engine, idle, sputter", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["an engine is idling and vibrating", "water splashes and gurgles as people speak"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a man speaking together with birds chirping and distant murmuring", "water splashes as an animal walks through"], "sample_ids": ["uiS58TNyUiw", "w1ir-sZ3Im8"], "start_seconds": ["430", "90"], "properties": ["audio, man, speaking", "animal, water, splashes"], "captions_pred_video": ["of the pigeon in the cage", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "a man sprays as a scraping occurs in the background"], "sample_ids": ["ylpYOorfH4o", "sOa7g-44Dag"], "start_seconds": ["410", "30"], "properties": ["engine, running, wind", "background, man, spray"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking and rubbing his hands together "], "question": "which entity has a man speaking over a running engine and blowing wind?", "label": 0}, {"captions": ["a machine beeps continuously", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["y682ml90jGw", "uYT5gxnyMWM"], "start_seconds": ["11", "50"], "properties": ["beeps, machine, continuously", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a beeping sound is being made ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["uEU-Hg5MTN8", "tdWhHV3X25Q"], "start_seconds": ["27", "60"], "properties": ["animal, grunts, snorts", "applause, audience, yells"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "a horn rings out as a machine runs by"], "sample_ids": ["zj2R0XoFr5k", "slZLHwNbbt4"], "start_seconds": ["50", "300"], "properties": ["airplane, boy, fly", "a, horn, run"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["birds chirp as a bell rings", "an electric engine works nearby followed by a child talking"], "sample_ids": ["ziUT9IFTkjg", "xSKJGCItUWE"], "start_seconds": ["10", "10"], "properties": ["chirp, bell, ring", "engine, work, child"], "captions_pred_video": [null, "footage of the helicopter flying in the room"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a high pitched engine is running and a child speaks"], "question": "which entity is a machine", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "a man speaks as bees buzz and birds chirp"], "sample_ids": ["sa6TLVbooCc", "t25U-v4k4ts"], "start_seconds": ["240", "40"], "properties": ["people, laugh, child", "bees buzz, birds chirp, man speaks"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a man is speaking and bees are buzzing"], "question": "which entity has a child speaking?", "label": 0}, {"captions": ["a motor idles, accelerates, then slows down.", "wind blows and people scream while an engine revs"], "sample_ids": ["vYkA3cfXp5Q", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["speed, idle, accelerate", "wind, engine, scream"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", null], "captions_pred_audio": ["an engine is idling", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a video of a car?", "label": 0}, {"captions": ["frogs croak and vocalize", "an infant crying as a woman laughs"], "sample_ids": ["yswmmRZFItk", "xhmRY9yhC7c"], "start_seconds": ["0", "20"], "properties": ["croak, vocalize, frog", "a, laugh, infant"], "captions_pred_video": ["a close up of a frog in the water", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a frog is croaking", "a baby cries and a woman speaks"], "question": "which entity is a human", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "a frog vocalizes as birds chirp"], "sample_ids": ["x6ijhqRY38s", "wqUmIEzuNz4"], "start_seconds": ["250", "30"], "properties": ["something metal, glass, hit", "frog, bird, vocalize"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "a frog sitting in the grass on a sunny day"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a cat meows and rustles"], "question": "which entity is a frog?", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "vehicles pass by on a roadway"], "sample_ids": ["xZepNM9qcRA", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["background, motor, run", "pass, vehicle, roadway"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a car is driving on the road "], "question": "which entity is more active", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "bird squawks are accompanied by a man and woman speaking"], "sample_ids": ["yajyRTUQk3U", "wqZ135Ssz0"], "start_seconds": ["400", "60"], "properties": ["noise, woman, speak", "man, woman, squawks"], "captions_pred_video": ["- a woman cooking in the kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has a man and woman speaking?", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "a woman speaks as she rubs two objects together"], "sample_ids": ["rwTERCUno", "vzxHnu-SFEw"], "start_seconds": ["90", "80"], "properties": ["engine, idle, sputter", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["an engine is idling and vibrating", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a process", "label": 1}, {"captions": ["a man speaks as a machine runs", "paper is crumpling consistently"], "sample_ids": ["vD6lYD1l0BY", "v5cSxLaHADY"], "start_seconds": ["330", "0"], "properties": ["a, machine, run", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "three men talk while wind blows and some liquid flows"], "sample_ids": ["x6ijhqRY38s", "vJ7JPEFhyLA"], "start_seconds": ["250", "16"], "properties": ["something metal, glass, hit", "three men, wind, flow"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man talking?", "label": 0}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "a stream of water flows as people talk and wind blows"], "sample_ids": ["xNMovAf3o50", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["rain, thunder, music", "stream, water, flow"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", "footage is blurry and out of focus"], "captions_pred_audio": ["thunder and rain with music playing in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is flowing", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "a man speaks as insects buzz and a bird chirps"], "sample_ids": ["uZesmtKZGSw", "t25U-v4k4ts"], "start_seconds": ["250", "40"], "properties": ["car, track, man", "a, chirps, bird"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a man is speaking and bees are buzzing"], "question": "which entity has a bird chirp?", "label": 1}, {"captions": ["a person speaks over rustling leaves", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["zOZleIRqZm4", "wqZ135Ssz0"], "start_seconds": ["80", "60"], "properties": ["rustling, leaves, person", "two men, woman, birds"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "a vehicle engine accelerating then running on idle"], "sample_ids": ["sQwlkXjQabo", "vYkA3cfXp5Q"], "start_seconds": ["10", "30"], "properties": ["liquid, surface, spray", "engine, accelerate, idle"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["spraying followed by silence", "an engine is idling"], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["an aircraft engine runs", "small dogs yip and bark sharply"], "sample_ids": ["yLCORCnd35Q", "v-wcQf4BDY0"], "start_seconds": ["0", "120"], "properties": ["engine, aircraft, runs", "bark, yip, sharply"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "multiple people speak and children yell while water gurgles"], "sample_ids": ["ylpYOorfH4o", "vb1fPSDI4c"], "start_seconds": ["410", "30"], "properties": ["engine, running, wind", "multiple, people, yell"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", null], "captions_pred_audio": ["a man is speaking and an engine is revving", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "water splashes as an animal walks through"], "sample_ids": ["wRBHTgrbiwg", "w1ir-sZ3Im8"], "start_seconds": ["50", "90"], "properties": ["bird, owl, speak", "animal, water, splashes"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["wz7N8YRy74I", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["rooster, crow, background, people", "engine, revs, vehicle"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["someone whistles briefly", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["uFoga8sHpiw", "su6FAOcOA8c"], "start_seconds": ["90", "4"], "properties": ["sound, duration, pitch", "engine, idle, woman"], "captions_pred_video": ["footage of a bird in a cage", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a person whistles a song", "a woman is speaking and a subway train is moving "], "question": "which entity has a longer duration", "label": 1}, {"captions": ["birds chirp and a pop occurs before a man speaks", "a door opens and birds chirp"], "sample_ids": ["zuua6-5goWw", "yeFvk9x0wWI"], "start_seconds": ["30", "30"], "properties": ["sound, pop, bird", "door, open, birds"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "birds chirp in the background as a car drives by "], "question": "which entity has birds chirp and a pop occurs before a man speaks?", "label": 0}, {"captions": ["a man sprays as a scraping occurs in the background", "an insect buzzes around continuously"], "sample_ids": ["sOa7g-44Dag", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["background, man, spray", "buzzes, continuously, insect"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["zl9Dqx-j7q4", "vbZ-0lGPneg"], "start_seconds": ["6", "30"], "properties": ["motors rev, laugh, loudly", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a man driving a car in the dark", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a jet engine roars ", "a woman is speaking and a dog is whimpering"], "question": "which entity is more quiet", "label": 1}, {"captions": ["an insect buzzes around continuously", "an engine runs loudly"], "sample_ids": ["v25l1jef3JY", "vqZuVbG6-HI"], "start_seconds": ["0", "130"], "properties": ["buzzes, continuously, insect", "loud, engine, run"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["ukg5L09Wpvo", "uYT5gxnyMWM"], "start_seconds": ["150", "50"], "properties": ["a train, a horn, a bell", "female, spraying, scream"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking and spraying?", "label": 1}, {"captions": ["a large crowd cheers and applauds", "a child speaks in closed space"], "sample_ids": ["rqfQRErjfk8", "yW6FWLSLkx4"], "start_seconds": ["170", "40"], "properties": ["crowd, cheers, applauds", "child, space, speak"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is more likely to be in a public place", "label": 0}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a group of people chatter and talk as multiple horns honk in the background"], "sample_ids": ["y2bVZ7rz-5M", "yLy-WycbVVE"], "start_seconds": ["280", "30"], "properties": ["motor noise, horn, siren", "background, people, talk"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "a soccer field in a stadium with yellow and red seats"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a man is speaking and a church bell is ringing with wind noise in the background "], "question": "which entity has a horn honking?", "label": 0}, {"captions": ["paper is repeatedly crumpled and crinkled", "a child speaks in closed space"], "sample_ids": ["vms5XGTDVQc", "yW6FWLSLkx4"], "start_seconds": ["220", "40"], "properties": ["paper, crumpled, crinkled", "child, space, speak"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["paper is crumpled and crinkled", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is not crumpled and crinkled", "label": 1}, {"captions": ["wind blows as people chatter quietly", "a motorcycle engine is idling"], "sample_ids": ["xBxDz0CFVn0", "vZAqdHZ81yA"], "start_seconds": ["30", "180"], "properties": ["wind, chatter, people", "engine, motorcycle, idling"], "captions_pred_video": ["footage is blurry and out of focus", "a motorcycle is parked on the side of the road with its rear end facing the viewer"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "an engine is idling loudly"], "question": "which is quieter", "label": 1}, {"captions": ["food is frying while a woman speaks", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["yhQ2Lg-7qDY", "uYT5gxnyMWM"], "start_seconds": ["130", "50"], "properties": ["food, woman, speak", "a, scream, girl"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["a piece of wood is being placed down and sawed", "a motor runs in the distance as a soft wind periodically gusts"], "sample_ids": ["uiItxDsDMFI", "xyL9F5VrjkE"], "start_seconds": ["30", "20"], "properties": ["wood, piece, saw", "wind, motor, distance"], "captions_pred_video": ["a man cutting a log with an axe in the woods", "of a caterpillar truck loading logs into a trailer"], "captions_pred_audio": ["a saw is being used with background noise ", "the wind is blowing and a car is passing by "], "question": "which entity is not a piece of wood?", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a clock ticktocks"], "sample_ids": ["x5cuQjOdM3E", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["cat, meows, young woman", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a black background with an airplane flying in the sky", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a cat meows and a woman speaks", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "paper folding and crinkling"], "sample_ids": ["vddP56-ogds", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["liquid, laughs, man", "paper, fold, crinkle"], "captions_pred_video": [null, "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "the wind blows and a mouse clicks "], "question": "which entity is a demonstration of folding and crinkling?", "label": 1}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "some men converse over an engine running"], "sample_ids": ["w9lpbUn0hPc", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["male, wind, rustling", "men, converse, engine"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", null], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a conversation between men?", "label": 1}, {"captions": ["a clock ticktocks briefly", "a child speaks in closed space"], "sample_ids": ["u7C-AEBQM", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["ticktocks, clock, ticktocks briefly", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a ticktock of a clock", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a drill runs and two people laugh", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["tEE3MpBt1sg", "uEU-Hg5MTN8"], "start_seconds": ["50", "27"], "properties": ["two people, laugh, drill", "a woman, laughs, animal"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a woman is speaking and a baby is crying"], "question": "which entity has a drill running?", "label": 0}, {"captions": ["a person is burping then speaks and laughs", "an engine revs and a turning noise is made"], "sample_ids": ["wAAkbZToh8", "tOSWIURC-4"], "start_seconds": ["0", "0"], "properties": ["burp, laugh, speak", "noise, engine, revs"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man burps and a woman speaks", "a lawn mower is running "], "question": "which entity is not a noise?", "label": 0}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["y2bVZ7rz-5M", "wz7N8YRy74I"], "start_seconds": ["280", "30"], "properties": ["motor noise, horn, siren", "rooster, crow, background, men"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["uYT5gxnyMWM", "xfaoyyzw2WU"], "start_seconds": ["50", "180"], "properties": ["female, spraying, scream", "loud, jet engine, roar"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a clock ticktocks briefly", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["u7C-AEBQM", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["ticktocks, clock, ticktocks briefly", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["birds tweet and squawk", "water pouring and bubbling"], "sample_ids": ["w1mlz3Pe4fU", "uyRfq-jKPpo"], "start_seconds": ["300", "50"], "properties": ["squawk, tweet, scream", "water, bubbles, pouring"], "captions_pred_video": ["of a bird in a cage", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["birds are chirping and singing", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vveS8HT7Uog", "uYT5gxnyMWM"], "start_seconds": ["100", "50"], "properties": ["a man, objects, speak", "female, spraying, scream"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["a toilet flushes and a female speaks", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["yaln9y8I7ms", "yDoT73BWsdA"], "start_seconds": ["230", "10"], "properties": ["female, flushes, toilet", "engine, revs, vehicle"], "captions_pred_video": ["footage is blurry and out of focus", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["wind blows and a stream of water flows nearby", "paper folding and crinkling"], "sample_ids": ["sYITalLZjj4", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["stream, flow, wind", "paper, fold, crinkle"], "captions_pred_video": ["two ducks are swimming in the water near each other", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["wind blows and birds chirp", "the wind blows and a mouse clicks "], "question": "which entity is not a stream of water", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "an airplane engine runs"], "sample_ids": ["v0x1odnXtP0", "yVPZ2MNWpms"], "start_seconds": ["210", "0"], "properties": ["keyboard, type, computer", "engine, airplane, runs"], "captions_pred_video": ["how to make money on youtube in spanish", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a person is typing on a keyboard", "a car is driving by on the road "], "question": "which is a moving object", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "people speak softly as food sizzles"], "sample_ids": ["s4Uz1Ffgo04", "yhQ2Lg-7qDY"], "start_seconds": ["100", "130"], "properties": ["roars, background, people speaking", "food, sizzle, speak"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "a pan filled with meat and sauce being cooked on a stove top"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a faucet is running and a man is speaking"], "question": "which entity is quieter", "label": 1}, {"captions": ["a person snoring several times", "a car speeding up in the distance"], "sample_ids": ["spJCm8tD9Zo", "u0TrcHhkPQ"], "start_seconds": ["90", "20"], "properties": ["snore, person, several", "distance, car, speed"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "some men converse over an engine running"], "sample_ids": ["vlS6YMeWAPo", "sCiy7QS1U"], "start_seconds": ["40", "300"], "properties": ["noise, bleat, call", "men, converse, engine"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", null], "captions_pred_audio": ["a goat bleats and birds chirp", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a conversation?", "label": 1}, {"captions": ["a small engine idles continuously", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["y5WII6cTH7k", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["engine, idle, continuously", "female, spraying, scream"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a woman is speaking and a baby is crying"], "question": "which entity is not spraying?", "label": 0}, {"captions": ["a dog barks and whimpers", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["sShpyu2l4YQ", "vbZ-0lGPneg"], "start_seconds": ["0", "30"], "properties": ["barks, whimpers, dog", "a woman, a television program, a bird"], "captions_pred_video": ["the puppies are playing with a toy", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a dog is barking and growling", "a woman is speaking and a dog is whimpering"], "question": "which entity is a person", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "a clock ticktocks"], "sample_ids": ["uWPRNLnpy7Y", "v-g-j2uTByM"], "start_seconds": ["10", "30"], "properties": ["accelerate, laugh, vehicle", "ticktocks, clock, ticktocks"], "captions_pred_video": ["is taken from a car driving down the street", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["a woman speaks and other women and a man talk with her", "a car accelerates and wind blows"], "sample_ids": ["vbpKkWvfOu4", "u0TrcHhkPQ"], "start_seconds": ["560", "20"], "properties": ["a, woman, man", "accelerates, wind, blows"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "water is sprayed across a hard surface"], "sample_ids": ["tGcFnX0GHI", "sQwlkXjQabo"], "start_seconds": ["0", "10"], "properties": ["ring, talk, woman", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["several insects fly while two men talk", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["s-T9OVOiMLo", "zj2R0XoFr5k"], "start_seconds": ["330", "50"], "properties": ["several, fly, men", "airplane, boy, fly"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a woman and man are speaking", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vbpKkWvfOu4", "xfaoyyzw2WU"], "start_seconds": ["560", "180"], "properties": ["two people, speaking, woman, man", "loud, jet engine, roar"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["goats bleat and people speak", "a propeller rotates loudly and intensely"], "sample_ids": ["z5iUE5h0EPs", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["goats bleat, people speak, language", "loud, intense, propeller"], "captions_pred_video": ["of the goat in the barn", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a goat bleats and a man speaks", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "long loud burping by a man"], "sample_ids": ["zsLxS-uLJTw", "xmiUIOhtZyQ"], "start_seconds": ["20", "60"], "properties": ["horn, blast, train", "loud, burp, man"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "homer simpson drinking a beer"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a person burps and music plays in the background "], "question": "which is louder", "label": 0}, {"captions": ["water bubbles and gurgles.", "an aircraft engine runs"], "sample_ids": ["tB7hWb9gTuQ", "yLCORCnd35Q"], "start_seconds": ["30", "0"], "properties": ["bubbles, gurgles, water", "engine, aircraft, runs"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "a lufthansa airbus a380 landing at london's heathrow airport"], "captions_pred_audio": ["water is splashing and gurgling", "a train is moving and its wheels are squealing "], "question": "which entity is a moving object", "label": 1}, {"captions": ["people speak as gunfire rings out", "water runs into a sink while men speak"], "sample_ids": ["wqTCwqVRDlk", "vzceMbklWc"], "start_seconds": ["80", "180"], "properties": ["gunfire, ring, speak", "water, sink, run"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", null], "captions_pred_audio": ["a man is speaking and a gun is fired", "water is running and a man is speaking"], "question": "which entity is more calm", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "birds twitter and chirp and clatter"], "sample_ids": ["sWZzXuWYY", "yeFvk9x0wWI"], "start_seconds": ["420", "30"], "properties": ["male, speech, banging", "chirp, twitter, clatter"], "captions_pred_video": [null, "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "birds chirp in the background as a car drives by "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man talks as several small engines run", "people cheer as a vehicle engine revs"], "sample_ids": ["u9A6VZQCZpU", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["a, man, talk", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a dark barks and whimpers", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["sYj4hpDUZDQ", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["barks, whimpers, dark", "a woman, a television program, a bird"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a dog barks and a cat meows", "a woman is speaking and a dog is whimpering"], "question": "which entity is more active", "label": 1}, {"captions": ["a flush is followed by gurgling water, then another flush", "people cheer as a vehicle engine revs"], "sample_ids": ["tqR406bGiE", "xjhAnI2q6hM"], "start_seconds": ["40", "6"], "properties": ["flush, water, gurgle", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a toilet is flushed", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a motorcycle engine is idling", "small dogs yip and bark sharply"], "sample_ids": ["vZAqdHZ81yA", "v-wcQf4BDY0"], "start_seconds": ["180", "120"], "properties": ["engine, motorcycle, idling", "bark, yip, sharply"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["an engine is idling loudly", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "an infant crying as a woman laughs"], "sample_ids": ["wtDqrBygTcU", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["man, engine, run", "a, laugh, infant"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and a motor is running", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["water running down a sink while a man is talking", "a stream of water runs briefly"], "sample_ids": ["vSeGhaZt-aI", "x-PeY8Yb8M4"], "start_seconds": ["50", "300"], "properties": ["water, sink, talk", "stream, water, run"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "a man speaks as a motor runs in the background"], "sample_ids": ["xM4joTqDVp4", "xZepNM9qcRA"], "start_seconds": ["160", "30"], "properties": ["background, chirp, birds", "background, motor, run"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a dog barks and whimpers", "a child speaks in closed space"], "sample_ids": ["sShpyu2l4YQ", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["barks, whimpers, dog", "child, space, speak"], "captions_pred_video": ["the puppies are playing with a toy", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a dog is barking and growling", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["x5cuQjOdM3E", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["cat, talk, meow", "a woman, laughs, animal"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a cat meows and a woman speaks", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a cat", "label": 0}, {"captions": ["two men speak as a buffeting wind blows", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["y8WEcpOlT3I", "w5W5Kqtc8E"], "start_seconds": ["40", "100"], "properties": ["wind, speak, buffeting", "wind, blow, vehicle"], "captions_pred_video": ["on how to use a sewing machine youtube", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blows before women yell?", "label": 1}, {"captions": ["a man is filing a hard object", "paper folding and crinkling"], "sample_ids": ["vveS8HT7Uog", "zPpG3RD8lSs"], "start_seconds": ["100", "20"], "properties": ["a man, hard, object", "paper, fold, crinkle"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "the wind blows and a mouse clicks "], "question": "which object is being filed", "label": 0}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "a man speaks as a car is passing by"], "sample_ids": ["s59PfAghdkM", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["bird, chirp, background, horse, neigh", "a, car, pass"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a horse in it?", "label": 0}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "people cheer as a vehicle engine revs"], "sample_ids": ["tQWGZLItBXk", "xjhAnI2q6hM"], "start_seconds": ["170", "6"], "properties": ["voice, music, whoosh", "engine revs, vehicle, people"], "captions_pred_video": ["worms revolution screenshots", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a truck is revving its engine and a man is speaking "], "question": "which entity has more people", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "a woman speaks as she rubs two objects together"], "sample_ids": ["zhx6hoYrHeI", "vzxHnu-SFEw"], "start_seconds": ["160", "80"], "properties": ["engine, sputter, rough", "two objects, woman, speak"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is not a person speaking?", "label": 0}, {"captions": ["white noise and birds chirping", "wind blows as people chatter quietly"], "sample_ids": ["wRBHTgrbiwg", "xBxDz0CFVn0"], "start_seconds": ["50", "30"], "properties": ["noise, white, chirping", "wind, chatter, people"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage is blurry and out of focus"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "a man speaks while turning a water faucet on"], "sample_ids": ["xvDdE3zNf8Y", "vf9xf3vMsGM"], "start_seconds": ["120", "540"], "properties": ["a, female, speaks", "A man speaks while turning a water faucet on."], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "of the person washing their hands under the faucet"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking while water is running in the background"], "question": "which entity is a man", "label": 1}, {"captions": ["a man speaking together with birds chirping and distant murmuring", "an airplane accelerates briefly"], "sample_ids": ["uiS58TNyUiw", "zjTG0gaGCUI"], "start_seconds": ["430", "80"], "properties": ["audio, man, speaking", "accelerates, airplane, briefly"], "captions_pred_video": ["of the pigeon in the cage", null], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a jet engine roars as wind blows "], "question": "which is a moving object", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "a man speaks while water trickles and flows"], "sample_ids": ["tDVADusiIoc", "sapQIQUhFc"], "start_seconds": ["60", "280"], "properties": ["wind, radio, waves", "water, trickles, flow"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking and a stream is flowing in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["bees buzz and wind blows", "a door opens and closes"], "sample_ids": ["tMJne1a4AFI", "vBHyYJ8pL0"], "start_seconds": ["0", "2"], "properties": ["bees buzz, wind blows, bees", "open, close, door"], "captions_pred_video": ["a swarm of bees on the ground", null], "captions_pred_audio": ["a swarm of bees buzzing around", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is not a door?", "label": 0}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vlS6YMeWAPo", "zj2R0XoFr5k"], "start_seconds": ["40", "50"], "properties": ["sheep, baa, birds", "airplane, boy, fly"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a goat bleats and birds chirp", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a woman speaks with water running", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["wTideSjRFS0", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["water, running, woman", "two men, woman, birds"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["water bubbles and gurgles.", "an infant crying as a woman laughs"], "sample_ids": ["tB7hWb9gTuQ", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["bubbles, gurgles, water", "a, laugh, infant"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["water is splashing and gurgling", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "children speak and play together"], "sample_ids": ["zcDwZ6W7E3E", "yVVP8XvWJTo"], "start_seconds": ["180", "260"], "properties": ["a, man, speak", "children, speak, play"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage of a playground at a school or daycare center"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "children are speaking and breathing with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a stream of water flows as people talk and wind blows", "a woman speaks as she rubs two objects together"], "sample_ids": ["xBxDz0CFVn0", "vzxHnu-SFEw"], "start_seconds": ["30", "80"], "properties": ["stream, water, flow", "two objects, woman, speak"], "captions_pred_video": ["footage is blurry and out of focus", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "someone whistles a tune"], "sample_ids": ["zsLxS-uLJTw", "sIXTftIuUgw"], "start_seconds": ["20", "90"], "properties": ["horn, blast, train", "someone, tune, whistle"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", null], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a person whistling a song"], "question": "which is a musical instrument", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["zFjIWfSD-4", "w34HjHr6gAY"], "start_seconds": ["410", "30"], "properties": ["People, motor, brakes", "beeps, hit, woman"], "captions_pred_video": [null, "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a man talks as several small engines run", "a toilet flushes and water drains"], "sample_ids": ["u9A6VZQCZpU", "sfAvvZwdLCY"], "start_seconds": ["30", "20"], "properties": ["a, man, talk", "water drains, flushes, water"], "captions_pred_video": [null, "footage of the toilet in the bathroom"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["bees buzz and wind blows", "paper is crumpling consistently"], "sample_ids": ["tMJne1a4AFI", "v5cSxLaHADY"], "start_seconds": ["0", "0"], "properties": ["bees buzz, wind blows, bees", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a swarm of bees on the ground", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a swarm of bees buzzing around", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["wind blows as people chatter quietly", "an airplane accelerates briefly"], "sample_ids": ["xBxDz0CFVn0", "zjTG0gaGCUI"], "start_seconds": ["30", "80"], "properties": ["wind, chatter, people", "accelerates, airplane, briefly"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a jet engine roars as wind blows "], "question": "which entity is moving", "label": 1}, {"captions": ["a stream of water flows quickly", "birds chirp and objects are moved around"], "sample_ids": ["wbHTKEJZyhc", "yPUYU6t3rwo"], "start_seconds": ["20", "370"], "properties": ["stream, water, flow", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "multiple adults speaking, and a child shouting in the background"], "sample_ids": ["yajyRTUQk3U", "yks4cLgIDMc"], "start_seconds": ["400", "170"], "properties": ["a woman, something, fried", "background, speaking, child"], "captions_pred_video": ["- a woman cooking in the kitchen", "footage of two kids wrestling on the floor"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking and a child is crying"], "question": "which entity has a child shouting in the background?", "label": 1}, {"captions": ["a machine runs continuously", "frogs croak and vocalize"], "sample_ids": ["wdXV3Pv0jiY", "yswmmRZFItk"], "start_seconds": ["11", "0"], "properties": ["machine, running, continuously", "croak, vocalize, frog"], "captions_pred_video": ["footage is blurry and shaky", "a close up of a frog in the water"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a frog is croaking"], "question": "which entity is not a machine?", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["yZrFNS7GFBQ", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["pigeon, buzzes, insect", "People, motor, brakes"], "captions_pred_video": ["of the bird in the cage", null], "captions_pred_audio": ["an owl hoots in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a person", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "a clock ticktocks"], "sample_ids": ["xM4joTqDVp4", "v-g-j2uTByM"], "start_seconds": ["160", "30"], "properties": ["background, chirp, birds", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a clock is ticking loudly"], "question": "which entity is a clock?", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "wind blowing followed by a zoom"], "sample_ids": ["slZLHwNbbt4", "vr8ZXjEBhMQ"], "start_seconds": ["300", "150"], "properties": ["clap, distance, horn", "wind, blow, zoom"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more likely to be heard", "label": 0}, {"captions": ["a railroad crossing bell rings as a train horn blows", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["tZGN5a7ybxo", "wz7N8YRy74I"], "start_seconds": ["60", "30"], "properties": ["ring, train, horn", "rooster, crow, background, men"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a train is moving and blowing its horn ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man speaking to a rooster?", "label": 1}, {"captions": ["a goat bleats as a person speaks", "water flows and trickles"], "sample_ids": ["tPJvjq9QePY", "tB7hWb9gTuQ"], "start_seconds": ["40", "30"], "properties": ["bleats, person, speak", "water, flow, trickle"], "captions_pred_video": ["a dog and a sheep in a barn", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a baby cries and a man speaks", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["children speak and play together", "multiple people speak and children yell while water gurgles"], "sample_ids": ["yVVP8XvWJTo", "vb1fPSDI4c"], "start_seconds": ["260", "30"], "properties": ["children, speak, play", "multiple, people, yell"], "captions_pred_video": ["footage of a playground at a school or daycare center", null], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "an engine runs loudly"], "sample_ids": ["ukxt9I7eMMg", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["continuous, woman, speaking", "loud, engine, run"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a lawn mower is running and men are speaking "], "question": "which is louder", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "a man speaks followed by another man speaking outside"], "sample_ids": ["wqUmIEzuNz4", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["frog, bird, vocalize", "two men, speak, follow"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a cat meows and rustles", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a single speaker?", "label": 0}, {"captions": ["vehicles pass by on a roadway", "water splashing and a person laughs in the distance then a man speaks nearby"], "sample_ids": ["tgbONvsP47Y", "vddP56-ogds"], "start_seconds": ["0", "30"], "properties": ["pass, vehicle, roadway", "water, splash, person, laugh"], "captions_pred_video": ["footage of a fire truck entering a garage", null], "captions_pred_audio": ["a car is driving on the road ", "water is running and gurgling and a man is speaking"], "question": "which entity is more active", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["weDbePuc-Xc", "y8WEcpOlT3I"], "start_seconds": ["40", "40"], "properties": ["music, slaps, human", "harsh, wind, blows"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a man is speaking with wind noise in the background "], "question": "which entity has a harsh wind blowing?", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "wind blowing followed by a zoom"], "sample_ids": ["uJV8NDaHqqk", "vr8ZXjEBhMQ"], "start_seconds": ["100", "150"], "properties": ["loud, fly, chirp", "wind, blow, zoom"], "captions_pred_video": ["a bee hive in a wooden box", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a swarm of bees buzzing around", "wind blows and a chainsaw cuts through wood "], "question": "which entity is not loud", "label": 1}, {"captions": ["children speak and play together", "winds blows roughly as a vehicle races past"], "sample_ids": ["yVVP8XvWJTo", "xjvTpk2Zpr8"], "start_seconds": ["260", "70"], "properties": ["children, speak, play", "wind, blows, vehicle"], "captions_pred_video": ["footage of a playground at a school or daycare center", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a motor vehicle roars, drowning out people speaking in the background"], "sample_ids": ["ugHJF0hfYkg", "s4Uz1Ffgo04"], "start_seconds": ["10", "100"], "properties": ["engine, running, continuously", "roars, background, people speaking"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["an adult woman and an adult man speak", "wind noise makes sound into a microphone"], "sample_ids": ["zTLVJCo4WEE", "w8uLijTqtlU"], "start_seconds": ["30", "70"], "properties": ["two people, adult, speak", "wind, microphone, noise"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "footage is blurry and shaky"], "captions_pred_audio": ["a woman speaks and crickets chirp", "the wind is blowing strongly"], "question": "which is not a person", "label": 1}, {"captions": ["an engine runs loudly", "a man speaks then multiple motorcycles pass by"], "sample_ids": ["vqZuVbG6-HI", "zcDwZ6W7E3E"], "start_seconds": ["130", "180"], "properties": ["loud, engine, run", "a, man, speak"], "captions_pred_video": ["footage is blurry because it's raining outside", "2 people riding motorcycles down a mountain road with trees lining the sides of the road"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a man is speaking while a car accelerates and revs its engine "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["xfudFO976zE", "xfaoyyzw2WU"], "start_seconds": ["0", "180"], "properties": ["animal, bleats, cry", "loud, jet engine, roar"], "captions_pred_video": ["footage is blurry and shaky", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["yVumC9TGknc", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["humming, clock, birds", "female, spraying, scream"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a series of beeps and chirps", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vb1fPSDI4c", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["multiple, people, yell", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["birds chirp as a bell rings", "winds blows roughly as a vehicle races past"], "sample_ids": ["ziUT9IFTkjg", "xjvTpk2Zpr8"], "start_seconds": ["10", "70"], "properties": ["chirp, bell, ring", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a jet engine roars and wind blows "], "question": "which entity is more likely to be a natural occurrence", "label": 0}, {"captions": ["a person is whistling", "pigeons vocalize and birds chirp"], "sample_ids": ["sIXTftIuUgw", "uiS58TNyUiw"], "start_seconds": ["90", "430"], "properties": ["person, whistling, person", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a person whistling a song", "a man is speaking and a bee is buzzing"], "question": "which entity is not a person?", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "dishes cling together then a man begins to speak"], "sample_ids": ["vf9xf3vMsGM", "sQGXqGcwOTc"], "start_seconds": ["540", "3"], "properties": ["A man speaks while turning a water faucet on.", "cling, speak, dishes"], "captions_pred_video": ["of the person washing their hands under the faucet", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking while water is running in the background", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a male speaks over some small clicks", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["uXxVebHsGZ8", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["male, clicks, speak", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a flush is followed by gurgling water, then another flush", "water splashes and a door squeaks"], "sample_ids": ["tqR406bGiE", "sdXV-ylviw"], "start_seconds": ["40", "190"], "properties": ["flush, water, gurgle", "sound, splash, door"], "captions_pred_video": [null, null], "captions_pred_audio": ["a toilet is flushed", "a dog barks and taps with background noise "], "question": "which entity has a door that squeaks?", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "a man speaks as birds chirp and a vehicle passes nearby"], "sample_ids": ["zj2R0XoFr5k", "siJFXfGWgDk"], "start_seconds": ["50", "50"], "properties": ["airplane, fly, overhead", "a, bird, vehicle"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a man is speaking and birds are chirping in the background "], "question": "which entity is flying overhead", "label": 0}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["yLy-WycbVVE", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["background, people, talk", "gun, shoot, water"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["water rushes by", "an insect buzzes around continuously"], "sample_ids": ["x-PeY8Yb8M4", "v25l1jef3JY"], "start_seconds": ["300", "0"], "properties": ["water, rushes, by", "buzzes, continuously, insect"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a car is driving on a wet road ", "a fly is buzzing around a microphone "], "question": "which entity is moving faster", "label": 0}, {"captions": ["a beep repeats multiple times", "a car accelerates and wind blows"], "sample_ids": ["y682ml90jGw", "u0TrcHhkPQ"], "start_seconds": ["11", "20"], "properties": ["beep, repeat, multiple", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a dark barks and whimpers", "a man speaks as a car is passing by"], "sample_ids": ["sYj4hpDUZDQ", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["barks, whimpers, dark", "a, car, pass"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a dog barks and a cat meows", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["water gurgles, metal squeaks and the water stops", "a person snores loudly multiple times at a close distance"], "sample_ids": ["x4a9YGIw4ok", "sSMl2vc3ek"], "start_seconds": ["120", "20"], "properties": ["water, gurgles, stops", "loud, multiple, distance"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a toilet flushes and water splashes", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "an adult woman speaks over chopping and silverware noises"], "sample_ids": ["xOZfdgAgJ9o", "yYJksgsxx5U"], "start_seconds": ["40", "30"], "properties": ["woman, whimpering, speaking", "audio, woman, silverware"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", "of a woman slicing an orange on a cutting board"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking and dishes are clanging in the background "], "question": "which woman is speaking", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["wAAkbZToh8", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["burp, laugh, speak", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man burps and a woman speaks", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 0}, {"captions": ["a man speaks while a vehicle engine runs and revs loudly", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["ylpYOorfH4o", "wz7N8YRy74I"], "start_seconds": ["410", "30"], "properties": ["engine, run, loud", "rooster, crow, background, men"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a man speaking to?", "label": 0}, {"captions": ["several insects fly while two men talk", "water flows as men speak and yell"], "sample_ids": ["s-T9OVOiMLo", "vJ7JPEFhyLA"], "start_seconds": ["330", "16"], "properties": ["several, fly, men", "water, flow, men"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows men speaking and yelling?", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "birds chirp and objects are moved around"], "sample_ids": ["wTideSjRFS0", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["food, sizzle, woman", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "insects buzz and a man speaks"], "question": "which entity is about birds?", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "some tunes played by whistling"], "sample_ids": ["s4Uz1Ffgo04", "u6BnG6YZqJ4"], "start_seconds": ["100", "0"], "properties": ["roars, background, people speaking", "tune, play, whistling"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a person whistling a song"], "question": "which entity is played by whistling", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["xjvTpk2Zpr8", "zFjIWfSD-4"], "start_seconds": ["70", "410"], "properties": ["engine, run, wind", "People, motor, brakes"], "captions_pred_video": ["footage of a dhl plane landing on the runway", null], "captions_pred_audio": ["a jet engine roars and wind blows ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is running", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "a motor noise is accompanied by a door opening and closing"], "sample_ids": ["xyL9F5VrjkE", "vBHyYJ8pL0"], "start_seconds": ["20", "2"], "properties": ["wind, blows, vehicle", "noise, door, opening"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", null], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is accompanied by a door opening and closing?", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "women speak as water runs briefly, children call out, and a man speaks"], "sample_ids": ["wTideSjRFS0", "uRExseg-0XI"], "start_seconds": ["30", "210"], "properties": ["food, sizzle, woman", "woman, man, water"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking while water is running and birds are chirping "], "question": "which entity has a man speaking?", "label": 1}, {"captions": ["male speech with light ticking", "a man speaks with another voice speaking in the background"], "sample_ids": ["xO-Q2BlIIPU", "u21-Z5gJCB8"], "start_seconds": ["30", "30"], "properties": ["male, speech, ticking", "background, voice, man"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "- a person cooking eggs in a pan on the stove"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking with another voice speaking in the background?", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "water splashes as an animal walks through"], "sample_ids": ["vZAw4apG0Es", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["background, tick, repeat", "animal, water, splashes"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a clock is ticking and people are talking", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["small dogs yip and bark sharply", "a baby laugh at a sputter"], "sample_ids": ["v-wcQf4BDY0", "sLUnaPT5gM8"], "start_seconds": ["120", "0"], "properties": ["bark, yip, sharply", "laugh, sputter, baby"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a dog barks and growls", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more likely to be a child", "label": 1}, {"captions": ["a infant makes noise and is excited", "water pouring and bubbling"], "sample_ids": ["wIJK3-5y0kA", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["noise, excited, infant", "water, bubbles, pouring"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a baby cries and a woman speaks", "water is running from a faucet"], "question": "which entity is bubbling", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "an infant crying frantically"], "sample_ids": ["wAAkbZToh8", "zwOBqeFTgiU"], "start_seconds": ["0", "30"], "properties": ["burp, laugh, speak", "cry, infant, frantically"], "captions_pred_video": [null, "of the baby crying in the car seat"], "captions_pred_audio": ["a man burps and a woman speaks", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wsHBIgzs9Fs", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["horn, continuous, buzzing", "three men, wind, flow"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["birds chirp then an animal grunts", "a duck quacks continuously"], "sample_ids": ["tDlysoZiA1I", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["animal, grunt, chirp", "quacks, continuously, duck"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a duck is quacking loudly"], "question": "which animal is speaking", "label": 1}, {"captions": ["goats bleat and people speak", "a machine beeps continuously"], "sample_ids": ["z5iUE5h0EPs", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["goats bleat, people speak, language", "beeps, machine, continuously"], "captions_pred_video": ["of the goat in the barn", null], "captions_pred_audio": ["a goat bleats and a man speaks", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaks as a machine runs", "water flows as men speak and yell"], "sample_ids": ["vD6lYD1l0BY", "vJ7JPEFhyLA"], "start_seconds": ["330", "16"], "properties": ["a, machine, run", "water, flow, men"], "captions_pred_video": ["game controller being held in the hands of the person", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a man speaking as a machine runs?", "label": 0}, {"captions": ["a horn blasts loudly as a train passes", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["zsLxS-uLJTw", "wDVMhEdTiVw"], "start_seconds": ["20", "30"], "properties": ["horn, blast, train", "gun, shoot, water"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to cause a train to pass", "label": 0}, {"captions": ["a man speaks while turning a water faucet on", "a man speaks over intermittent keyboard taps"], "sample_ids": ["vf9xf3vMsGM", "tw76HGONaKg"], "start_seconds": ["540", "570"], "properties": ["A man speaks while turning a water faucet on.", "audio, man, keyboard"], "captions_pred_video": ["of the person washing their hands under the faucet", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a man speaks and types on a computer keyboard "], "question": "which entity is a video?", "label": 0}, {"captions": ["a vehicle engine revs and tires squeal", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["yDoT73BWsdA", "vbZ-0lGPneg"], "start_seconds": ["10", "30"], "properties": ["engine revs, tires squeal, vehicle", "a woman, a television program, a bird"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["a man speaks over a running engine and blowing wind", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["ylpYOorfH4o", "ukg5L09Wpvo"], "start_seconds": ["410", "150"], "properties": ["engine, running, wind", "clickety-clack, train, whistle"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "water splashes as an animal walks through"], "sample_ids": ["tPJvjq9QePY", "w1ir-sZ3Im8"], "start_seconds": ["40", "90"], "properties": ["animal, bleat, moo", "animal, water, splashes"], "captions_pred_video": ["a dog and a sheep in a barn", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a baby cries and a man speaks", "water splashes and gurgles as people speak"], "question": "which animal is more active", "label": 1}, {"captions": ["an engine runs and wind blows", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vs65y4qmyBE", "vbZ-0lGPneg"], "start_seconds": ["340", "30"], "properties": ["engine, run, wind", "a woman, a television program, a bird"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a person", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["spJCm8tD9Zo", "uEU-Hg5MTN8"], "start_seconds": ["90", "27"], "properties": ["snores, wheezes, sleeps", "animal, grunts, snorts"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 0}, {"captions": ["an animal growls followed by birds chirping", "people cheer as a vehicle engine revs"], "sample_ids": ["y2ZBGpgbhHM", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["animal, growl, bird", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["birds chirping and a dog panting", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a telephone rings and a bird vocalizes", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["skd2PphS6oI", "zj2R0XoFr5k"], "start_seconds": ["190", "50"], "properties": ["ring, bird, vocalize", "airplane, boy, fly"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a telephone bell rings repeatedly ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a woman and man are speaking", "paper folding and crinkling"], "sample_ids": ["vbpKkWvfOu4", "zPpG3RD8lSs"], "start_seconds": ["560", "20"], "properties": ["two people, speaking, woman, man", "paper, fold, crinkle"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "the wind blows and a mouse clicks "], "question": "which is not a person", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "a duck quacks loudly and continuously"], "sample_ids": ["zuua6-5goWw", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["birds, chirp, quiet, man, speaks", "loud, continuous, quacks"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 0}, {"captions": ["a large bell chimes back and forth loudly", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["w2M4i1mklOA", "ziUT9IFTkjg"], "start_seconds": ["30", "10"], "properties": ["loud, chime, bell", "background, birds, rustling"], "captions_pred_video": ["footage of an antique clock", null], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "birds are chirping and a chime is ringing "], "question": "which entity is quieter", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["tQWGZLItBXk", "wSVhSdj0F0"], "start_seconds": ["170", "10"], "properties": ["voice, music, whoosh", "horn honks, keys jingle, electronic beep"], "captions_pred_video": ["worms revolution screenshots", null], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a car horn honks and keys jangle with background noise "], "question": "which entity has a horn honk?", "label": 1}, {"captions": ["a man talks as several small engines run", "a woman speaks and other women and a man talk with her"], "sample_ids": ["u9A6VZQCZpU", "vbpKkWvfOu4"], "start_seconds": ["30", "560"], "properties": ["a, man, talk", "a, woman, man"], "captions_pred_video": [null, "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a woman is speaking and a man is speaking"], "question": "which entity has more people", "label": 1}, {"captions": ["a beep repeats multiple times", "people applaud and hoot and chat quietly"], "sample_ids": ["y682ml90jGw", "wwyfGO2J4"], "start_seconds": ["11", "90"], "properties": ["beep, repeat, multiple", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "an engine runs loudly"], "sample_ids": ["wnpJndXuxLc", "vqZuVbG6-HI"], "start_seconds": ["50", "130"], "properties": ["beeps, loud, whistle", "loud, engine, run"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "a man speaks in the background while a slow tick repeats"], "sample_ids": ["rqu8iB22IY", "vZAw4apG0Es"], "start_seconds": ["5", "30"], "properties": ["sound, repeats, laugh", "background, tick, repeat"], "captions_pred_video": [null, "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a clock is ticking and people are talking"], "question": "which entity has a tick repeating in the background?", "label": 1}, {"captions": ["children speak and play together", "people cheer as a vehicle engine revs"], "sample_ids": ["yVVP8XvWJTo", "xjhAnI2q6hM"], "start_seconds": ["260", "6"], "properties": ["children, speak, play", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a playground at a school or daycare center", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a machine beeps continuously", "some men converse over an engine running"], "sample_ids": ["y682ml90jGw", "sCiy7QS1U"], "start_seconds": ["11", "300"], "properties": ["beeps, machine, continuously", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a beeping sound is being made ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "wind blowing followed by a zoom"], "sample_ids": ["wDVMhEdTiVw", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["gun, shoot, water", "wind, blow, zoom"], "captions_pred_video": ["a blurry image of trees and water in the forest", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vuUVPzd2FXw", "uZesmtKZGSw"], "start_seconds": ["160", "250"], "properties": ["a, steam, release", "men, talk, cars"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about cars?", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["tDVADusiIoc", "wqZ135Ssz0"], "start_seconds": ["60", "60"], "properties": ["water, radio, man", "two men, woman, birds"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a dark barks and whimpers", "a child speaks in closed space"], "sample_ids": ["sYj4hpDUZDQ", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["barks, whimpers, dark", "child, space, speak"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a dog barks and a cat meows", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["zj2R0XoFr5k", "vbZ-0lGPneg"], "start_seconds": ["50", "30"], "properties": ["airplane, fly, overhead", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a bird?", "label": 1}, {"captions": ["an adult woman and an adult man speak", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["zTLVJCo4WEE", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["two people, adult, speak", "two men, woman, birds"], "captions_pred_video": ["- a boy with a rifle aiming at a target", null], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["su6FAOcOA8c", "uEU-Hg5MTN8"], "start_seconds": ["4", "27"], "properties": ["engine, idle, woman", "animal, grunts, snorts"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a woman is speaking and a baby is crying"], "question": "which entity is a person speaking to an animal?", "label": 1}, {"captions": ["water running down a sink while a man is talking", "water flows as men speak and yell"], "sample_ids": ["vSeGhaZt-aI", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["water, sink, talk", "water, flow, men"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more water flowing", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["s59PfAghdkM", "vfYTJq7nU"], "start_seconds": ["0", "130"], "properties": ["bird, chirp, background, horse, neigh", "rustling, ducks, quack"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", null], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 0}, {"captions": ["birds chirp and objects are moved around", "winds blows roughly as a vehicle races past"], "sample_ids": ["yPUYU6t3rwo", "xjvTpk2Zpr8"], "start_seconds": ["370", "70"], "properties": ["birds chirp, objects are moved around, birds", "wind, blows, vehicle"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["insects buzz and a man speaks", "a jet engine roars and wind blows "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["birds chirp and pigeons vocalize while walking around", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wIvYjuR3nrg", "zj2R0XoFr5k"], "start_seconds": ["9", "50"], "properties": ["birds, pigeons, vocalize", "airplane, boy, fly"], "captions_pred_video": ["footage of a pigeon sitting on a roof with trees in the background", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["birds are chirping and cooing", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["several ducks are quacking and squawking", "a clock ticktocks"], "sample_ids": ["wfHeoPDLMaM", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["quacking, squawking, ducks", "ticktocks, clock, ticktocks"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["ducks are quacking", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "a jet engine screams, then increases its power"], "sample_ids": ["u21-Z5gJCB8", "vBslzh7saPw"], "start_seconds": ["30", "90"], "properties": ["background, voice, man", "power, scream, increase"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "a pickup truck carrying a large object down the road"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a jet engine roars and accelerates "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "water runs from a faucet while some men speak and the water runs in the sink"], "sample_ids": ["wz7N8YRy74I", "vzceMbklWc"], "start_seconds": ["30", "180"], "properties": ["rooster, crow, background, men", "water, faucet, sink"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", null], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "water is running and a man is speaking"], "question": "which entity has a sink?", "label": 1}, {"captions": ["a baby cries and wails as an adult female speaks", "a man speaks as bees buzz and birds chirp"], "sample_ids": ["zliInBdC98Y", "t25U-v4k4ts"], "start_seconds": ["30", "40"], "properties": ["a, baby, cries, wails", "bees buzz, birds chirp, man speaks"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", "of a beekeeper working on a beehive in the woods"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking and bees are buzzing"], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "a telephone rings followed by a woman talking"], "sample_ids": ["w2M4i1mklOA", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["loud, chime, bell", "ring, talk, woman"], "captions_pred_video": ["footage of an antique clock", null], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is quieter", "label": 1}, {"captions": ["a woman speaks and dog vocalizes", "a telephone rings followed by a woman talking"], "sample_ids": ["uWAAAL4CIoc", "tGcFnX0GHI"], "start_seconds": ["0", "0"], "properties": ["a, dog, vocalize", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["sShpyu2l4YQ", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["growl, bark, yip", "animal, grunts, snorts"], "captions_pred_video": ["the puppies are playing with a toy", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a dog is barking and growling", "a woman is speaking and a baby is crying"], "question": "which entity is more snorts", "label": 1}, {"captions": ["long loud burping by a man", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xmiUIOhtZyQ", "tDVADusiIoc"], "start_seconds": ["60", "60"], "properties": ["loud, burp, man", "water, radio, man"], "captions_pred_video": ["homer simpson drinking a beer", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a person burps and music plays in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio?", "label": 1}, {"captions": ["a sleeping person emits a gravely snore", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["w2JXXIAdUdg", "zl9Dqx-j7q4"], "start_seconds": ["10", "6"], "properties": ["emits, sleeping, person", "engine, laugh, loud"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sjlVMgdGSK0", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["car, revving, loudly", "airplane, boy, fly"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["smDKStoHBJo", "y8WEcpOlT3I"], "start_seconds": ["0", "40"], "properties": ["a, talk, baby, cry", "harsh, wind, blows"], "captions_pred_video": ["a man holding a crying baby in his arms", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking with wind noise in the background "], "question": "which entity has a harsh wind blowing?", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "soft movement is accompanied by clocks ticking in the background"], "sample_ids": ["zdYdyF9-m8U", "vlJS7LN2XyM"], "start_seconds": ["7", "30"], "properties": ["wind, crash, shoreline", "background, clocks, ticking"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video"], "captions_pred_audio": ["waves crash and wind blows ", "a ticktock of a clock"], "question": "which entity is more calm", "label": 1}, {"captions": ["bees buzz as wind blows", "a clock ticktocks and sounds an alarm then a man laughs"], "sample_ids": ["tMJne1a4AFI", "xV7Mg1QucSc"], "start_seconds": ["0", "14"], "properties": ["bees, buzz, wind", "alarm, ticktocks, laughs"], "captions_pred_video": ["a swarm of bees on the ground", "a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a swarm of bees buzzing around", "an alarm clock ticks and a woman laughs"], "question": "which entity is not a clock?", "label": 0}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "a horn rings out as a machine runs by"], "sample_ids": ["wnpJndXuxLc", "slZLHwNbbt4"], "start_seconds": ["50", "300"], "properties": ["blows, vehicle, train", "a, horn, run"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a toilet flushes and water drains", "an airplane engine spools and people speak"], "sample_ids": ["sfAvvZwdLCY", "wTjoRj1se3U"], "start_seconds": ["20", "390"], "properties": ["water drains, flushes, water", "airplane, engine, spool"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a toilet is flushed", "a jet engine is running and people are talking"], "question": "which entity is a machine", "label": 1}, {"captions": ["a jet engine spools up and takes off", "several insects fly while two men talk"], "sample_ids": ["vBslzh7saPw", "s-T9OVOiMLo"], "start_seconds": ["90", "330"], "properties": ["engine, spools, takes", "several, fly, men"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a moving object", "label": 0}, {"captions": ["some tunes played by whistling", "someone whistles a tune"], "sample_ids": ["u6BnG6YZqJ4", "sIXTftIuUgw"], "start_seconds": ["0", "90"], "properties": ["tune, play, whistling", "someone, tune, whistle"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", null], "captions_pred_audio": ["a person whistling a song", "a person whistling a song"], "question": "which is a more active way to play a tune", "label": 0}, {"captions": ["a man talks while a clock does ticktock", "an engine runs loudly"], "sample_ids": ["spYNpeN7rPY", "vqZuVbG6-HI"], "start_seconds": ["1", "130"], "properties": ["a clock, ticktock, man", "loud, engine, run"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a lawn mower is running and men are speaking "], "question": "which entity is quieter", "label": 0}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wSVhSdj0F0", "vJ7JPEFhyLA"], "start_seconds": ["10", "16"], "properties": ["beep, clang, footsteps", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more like a movie", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "an insect buzzes around continuously"], "sample_ids": ["sQwlkXjQabo", "v25l1jef3JY"], "start_seconds": ["10", "0"], "properties": ["liquid, surface, spray", "buzzes, continuously, insect"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["spraying followed by silence", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["bees buzz as wind blows", "people cheer as a vehicle engine revs"], "sample_ids": ["tMJne1a4AFI", "xjhAnI2q6hM"], "start_seconds": ["0", "6"], "properties": ["bees, buzz, wind", "engine revs, vehicle, people"], "captions_pred_video": ["a swarm of bees on the ground", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a swarm of bees buzzing around", "a truck is revving its engine and a man is speaking "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "wind blows as people chatter quietly"], "sample_ids": ["zgUgkpk78xU", "xBxDz0CFVn0"], "start_seconds": ["70", "30"], "properties": ["clinking, humming, horn", "wind, chatter, people"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage is blurry and out of focus"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "wind blows as people chatter quietly"], "sample_ids": ["ziUT9IFTkjg", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["background, birds, rustling", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "an infant crying as a woman laughs"], "sample_ids": ["s3cTDAj31g", "xhmRY9yhC7c"], "start_seconds": ["80", "20"], "properties": ["man, talk, woman", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and a baby is crying", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks while playing a video game on a keyboard", "a horn rings out as a machine runs by"], "sample_ids": ["tw76HGONaKg", "slZLHwNbbt4"], "start_seconds": ["570", "300"], "properties": ["A, game, keyboard", "a, horn, run"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "water running down a sink while a man is talking"], "sample_ids": ["uWAAAL4CIoc", "vSeGhaZt-aI"], "start_seconds": ["0", "50"], "properties": ["a woman, chirps, animal", "water, sink, talk"], "captions_pred_video": [null, "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a man is speaking and pouring liquid with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a woman and man are speaking", "water splashes as an animal walks through"], "sample_ids": ["vbpKkWvfOu4", "w1ir-sZ3Im8"], "start_seconds": ["560", "90"], "properties": ["two people, speaking, woman, man", "animal, water, splashes"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "water splashes and gurgles as people speak"], "question": "which entity is a video of an animal walking through water?", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "someone whistles a tune"], "sample_ids": ["wqZ135Ssz0", "sIXTftIuUgw"], "start_seconds": ["60", "90"], "properties": ["man, woman, squawks", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["sU53zg9Jp7s", "tiDFTC-5vU"], "start_seconds": ["380", "30"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "male, duck, laugh"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", null], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck?", "label": 1}, {"captions": ["rain falls onto a hard surface and thunder roars before music plays", "water flows and trickles"], "sample_ids": ["xNMovAf3o50", "tB7hWb9gTuQ"], "start_seconds": ["0", "30"], "properties": ["rain, thunder, music", "water, flow, trickle"], "captions_pred_video": ["tieng mua - the falling rain lynk lee", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["thunder and rain with music playing in the background ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "water pouring and bubbling"], "sample_ids": ["x6ijhqRY38s", "uyRfq-jKPpo"], "start_seconds": ["250", "50"], "properties": ["bowl, silverware, man", "water, bubbles, pouring"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "a car speeding up in the distance"], "sample_ids": ["u21-Z5gJCB8", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["background, voice, man", "distance, car, speed"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "someone whistles a tune"], "sample_ids": ["wz7N8YRy74I", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["rooster, crow, background, people", "someone, tune, whistle"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", null], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["sa6TLVbooCc", "w34HjHr6gAY"], "start_seconds": ["240", "30"], "properties": ["people, laugh, child", "beeps, hit, woman"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a clock ticktocks briefly", "people speak as gunfire rings out"], "sample_ids": ["u7C-AEBQM", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["ticktocks, clock, ticktocks briefly", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "a toilet flushes and water sputters as it drains"], "sample_ids": ["xfaoyyzw2WU", "smGI3C1NZc"], "start_seconds": ["180", "30"], "properties": ["loud, jet engine, roar", "water, drain, toilet"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", null], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a toilet is flushed"], "question": "which entity is quieter", "label": 1}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "people speak as gunfire rings out"], "sample_ids": ["yI-KvObbDoY", "wqTCwqVRDlk"], "start_seconds": ["260", "80"], "properties": ["sound, smack, wind", "gunfire, ring, speak"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a wooden clack accompanies nearby chirping birds"], "sample_ids": ["yYEVLuqEytU", "yeFvk9x0wWI"], "start_seconds": ["40", "30"], "properties": ["animal, pig, background", "clack, bird, chirp"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["several sheep bleat and a man speaks", "birds chirp in the background as a car drives by "], "question": "which entity has a bird chirping in the background?", "label": 0}, {"captions": ["here comes the train and it starts to blow the horn and get close", "water pouring and bubbling"], "sample_ids": ["s7knHCFW82w", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["blow horn, get close, train", "water, bubbles, pouring"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks as bees buzz and birds chirp", "a car speeding up in the distance"], "sample_ids": ["t25U-v4k4ts", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["bees buzz, birds chirp, man speaks", "distance, car, speed"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", null], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["an engine starts and increases in power", "an airplane flies overhead as a woman speaks"], "sample_ids": ["zjTG0gaGCUI", "zj2R0XoFr5k"], "start_seconds": ["80", "50"], "properties": ["power, increase, engine", "airplane, fly, overhead"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a woman speaks while a helicopter flies overhead "], "question": "which object is moving", "label": 1}, {"captions": ["there are rhythmical snoring nearby", "waves crash against a shoreline and people speak"], "sample_ids": ["ujMt0-D-x2k", "yFB25fqfU8I"], "start_seconds": ["0", "300"], "properties": ["snoring, rhythmical, nearby", "wave, crash, shoreline"], "captions_pred_video": ["of the dog playing with a toy on the floor", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "a frog croaks as other frogs croak in the background"], "sample_ids": ["sNB8zxXneIM", "yswmmRZFItk"], "start_seconds": ["20", "0"], "properties": ["several, quack, cocks", "background, frog, croak"], "captions_pred_video": ["a group of geese in a cage", "a close up of a frog in the water"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a frog is croaking"], "question": "which animal is more likely to be a frog", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "several insects fly while two men talk"], "sample_ids": ["wAAkbZToh8", "s-T9OVOiMLo"], "start_seconds": ["0", "330"], "properties": ["burp, laugh, speak", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man burps and a woman speaks", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about a person speaking and laughing?", "label": 0}, {"captions": ["water splashes and wind noise is made into a microphone", "water flows as men speak and yell"], "sample_ids": ["sDSppXIlJrs", "vJ7JPEFhyLA"], "start_seconds": ["27", "16"], "properties": ["microphone, water, wind", "water, flow, men"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "some men converse over an engine running"], "sample_ids": ["se87d6yxEOA", "sCiy7QS1U"], "start_seconds": ["10", "300"], "properties": ["run, whistle, pass", "men, converse, engine"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", null], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a steam engine?", "label": 0}, {"captions": ["a woman and man are speaking", "people applaud and hoot and chat quietly"], "sample_ids": ["vbpKkWvfOu4", "wwyfGO2J4"], "start_seconds": ["560", "90"], "properties": ["two people, speaking, woman, man", "people, applaud, hoot"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "people are clapping and speaking with background noise "], "question": "which entity shows more people", "label": 1}, {"captions": ["people speak softly as food sizzles", "people applaud and hoot and chat quietly"], "sample_ids": ["yhQ2Lg-7qDY", "wwyfGO2J4"], "start_seconds": ["130", "90"], "properties": ["food, sizzle, speak", "people, applaud, hoot"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", null], "captions_pred_audio": ["a faucet is running and a man is speaking", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["an engine starts and increases in power", "people speak as gunfire rings out"], "sample_ids": ["zjTG0gaGCUI", "wqTCwqVRDlk"], "start_seconds": ["80", "80"], "properties": ["power, increase, engine", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "pigeons vocalize and birds chirp"], "sample_ids": ["y2ZBGpgbhHM", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["animal, growl, bird", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["birds chirping and a dog panting", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a horn honks and then loudly blares", "a car revs and accelerates loudly and men and women chatter among themselves"], "sample_ids": ["wnpJndXuxLc", "y8dSeubCNI"], "start_seconds": ["50", "4"], "properties": ["horn, honk, loud", "men, women, car"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", null], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "an engine revving and people talking in the background"], "question": "which entity is more likely to be in a car", "label": 1}, {"captions": ["a heavy rain falls endlessly", "a man speaks while water drains"], "sample_ids": ["wP8ZKrlx3oA", "vSeGhaZt-aI"], "start_seconds": ["40", "50"], "properties": ["heavy, rain, fall", "water, drain, man"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a man is speaking and pouring liquid with background noise "], "question": "which entity is a man speaking while water drains?", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "goats bleat and people speak"], "sample_ids": ["uEU-Hg5MTN8", "z5iUE5h0EPs"], "start_seconds": ["27", "30"], "properties": ["a woman, laughs, animal", "goats bleat, people speak, language"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "of the goat in the barn"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a goat bleats and a man speaks"], "question": "which entity is a language", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "winds blows roughly as a vehicle races past"], "sample_ids": ["xKB8O8LTs6s", "xjvTpk2Zpr8"], "start_seconds": ["70", "70"], "properties": ["music, gunfire, explosion", "wind, blows, vehicle"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a jet engine roars and wind blows "], "question": "which entity is more calm", "label": 1}, {"captions": ["a motorcycle engine is idling", "a woman speaks happily and an animal chirps"], "sample_ids": ["vZAqdHZ81yA", "uWAAAL4CIoc"], "start_seconds": ["180", "0"], "properties": ["engine, motorcycle, idling", "a woman, chirps, animal"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", null], "captions_pred_audio": ["an engine is idling loudly", "a woman is speaking and a dog is barking "], "question": "which entity is not a motorcycle?", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "a vehicle engine accelerating then running on idle"], "sample_ids": ["siJFXfGWgDk", "vYkA3cfXp5Q"], "start_seconds": ["50", "30"], "properties": ["a, bird, vehicle", "engine, accelerate, idle"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "an engine is idling"], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a train horn blows as it passes by"], "sample_ids": ["tOSWIURC-4", "zVacuqSb4LI"], "start_seconds": ["0", "30"], "properties": ["engine, work, nearby", "horn, blows, train"], "captions_pred_video": [null, "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a lawn mower is running ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which is louder", "label": 0}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vdoxuJn9lTc", "zj2R0XoFr5k"], "start_seconds": ["40", "50"], "properties": ["burp, loud, girl", "airplane, boy, fly"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a child speaks followed by a burp", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "water pouring and bubbling"], "sample_ids": ["wSVhSdj0F0", "uyRfq-jKPpo"], "start_seconds": ["10", "50"], "properties": ["horn honks, keys jingle, slam", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "three men talk while wind blows and some liquid flows"], "sample_ids": ["uJV8NDaHqqk", "vJ7JPEFhyLA"], "start_seconds": ["100", "16"], "properties": ["loud, fly, chirp", "three men, wind, flow"], "captions_pred_video": ["a bee hive in a wooden box", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "birds chirp and an insect buzzes around"], "sample_ids": ["vZAw4apG0Es", "t97k0cejSQE"], "start_seconds": ["30", "250"], "properties": ["background, clock, ticktocks", "bird, chirp, insect"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "a bee on a purple thistle flower"], "captions_pred_audio": ["a clock is ticking and people are talking", "a bee buzzes and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a beep occurs briefly", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["xtWeJ56-U-g", "ukg5L09Wpvo"], "start_seconds": ["20", "150"], "properties": ["beep, occur, briefly", "clickety-clack, train, whistle"], "captions_pred_video": ["how to create a simple program in python 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 8", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["mechanisms are ticking and a beep is heard ", "a train blows its whistle and blows its horn "], "question": "which is continuous", "label": 0}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "people talk quietly in the distance, followed by a police car siren wailing"], "sample_ids": ["uC9dtII1KDI", "wy1eKjR7KC0"], "start_seconds": ["150", "30"], "properties": ["wind, gusts, distance", "people, talk, distance"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "two police officers riding motorcycles down the street"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a man is speaking and a siren is going off"], "question": "which entity is more distant", "label": 1}, {"captions": ["electronic beeps occur in a short series", "a woman speaks as she rubs two objects together"], "sample_ids": ["y682ml90jGw", "vzxHnu-SFEw"], "start_seconds": ["11", "80"], "properties": ["beeps, series, electronic", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a beeping sound is being made ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "a woman speaks in a fast tone with a male"], "sample_ids": ["sYITalLZjj4", "sTpirNYo8vQ"], "start_seconds": ["30", "30"], "properties": ["water, rushes, background, birds", "a, tone, fast"], "captions_pred_video": ["two ducks are swimming in the water near each other", "of a man taking a selfie on a bus"], "captions_pred_audio": ["wind blows and birds chirp", "a man is speaking while a car is revving and accelerating "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a infant makes noise and is excited", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["wIJK3-5y0kA", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["noise, excited, infant", "men, talk, cars"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a person whistles a meandering tune", "pigeons vocalize and birds chirp"], "sample_ids": ["uFoga8sHpiw", "uiS58TNyUiw"], "start_seconds": ["90", "430"], "properties": ["person, tune, whistle", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a bird in a cage", "of the pigeon in the cage"], "captions_pred_audio": ["a person whistles a song", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["birds vocalize and chirp continuously", "small dogs yip and bark sharply"], "sample_ids": ["w1mlz3Pe4fU", "v-wcQf4BDY0"], "start_seconds": ["300", "120"], "properties": ["vocalize, chirp, continuously", "bark, yip, sharply"], "captions_pred_video": ["of a bird in a cage", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["birds are chirping and singing", "a dog barks and growls"], "question": "which entity is more vocal", "label": 1}, {"captions": ["wind blows and people talk while livestock vocalizes", "a clock ticktocks"], "sample_ids": ["vXlk0lIQBFo", "v-g-j2uTByM"], "start_seconds": ["470", "30"], "properties": ["wind, talk, vocalize", "ticktocks, clock, ticktocks"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["someone whistles a song", "dogs bark as an engine runs and a person whistles"], "sample_ids": ["sIXTftIuUgw", "zY3icUyMdh8"], "start_seconds": ["90", "20"], "properties": ["someone, song, whistle", "dog, bark, engine"], "captions_pred_video": [null, "footage of a bus driving through a residential street at night"], "captions_pred_audio": ["a person whistling a song", "a car is driving and dogs are barking and squealing "], "question": "which entity is a person", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "a group of people chatter and talk as multiple horns honk in the background"], "sample_ids": ["vKrYfzleLB8", "yLy-WycbVVE"], "start_seconds": ["110", "30"], "properties": ["a, ring, gunshots", "background, people, talk"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", "a soccer field in a stadium with yellow and red seats"], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "a man is speaking and a church bell is ringing with wind noise in the background "], "question": "which entity has a man yell?", "label": 0}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "a car accelerates and wind blows"], "sample_ids": ["weDbePuc-Xc", "u0TrcHhkPQ"], "start_seconds": ["40", "20"], "properties": ["cartoon character, music, vocalize", "accelerates, wind, blows"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", null], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a person snoring several times", "paper is crumpling consistently"], "sample_ids": ["spJCm8tD9Zo", "v5cSxLaHADY"], "start_seconds": ["90", "0"], "properties": ["snore, person, several", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a person is snoring loudly", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["children speak as a female ask them questions", "vehicles pass by on a roadway"], "sample_ids": ["wEBlkGWVWwE", "tgbONvsP47Y"], "start_seconds": ["260", "0"], "properties": ["female, speak, questions", "pass, vehicle, roadway"], "captions_pred_video": ["shows a person writing on the whiteboard", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["someone whistles a tune", "an infant crying as a woman laughs"], "sample_ids": ["sIXTftIuUgw", "xhmRY9yhC7c"], "start_seconds": ["90", "20"], "properties": ["someone, tune, whistle", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a person whistling a song", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a jet engine spools up and takes off", "people cheer as a vehicle engine revs"], "sample_ids": ["vBslzh7saPw", "xjhAnI2q6hM"], "start_seconds": ["90", "6"], "properties": ["engine, spools, takes", "engine revs, vehicle, people"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man talks while vehicles pass by", "birds chirp and objects are moved around"], "sample_ids": ["sK4u5T8hW78", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["a, man, talk", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a stream of water flows quickly", "paper is crumpling consistently"], "sample_ids": ["wbHTKEJZyhc", "v5cSxLaHADY"], "start_seconds": ["20", "0"], "properties": ["stream, water, flow", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a frog vocalizes while birds chirp", "water splashes as an animal walks through"], "sample_ids": ["vMf1dLD6Sng", "w1ir-sZ3Im8"], "start_seconds": ["6", "90"], "properties": ["frog, bird, vocalize", "animal, water, splashes"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a frog croaks loudly", "water splashes and gurgles as people speak"], "question": "which entity is not a frog?", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "an adult woman and an adult man speak"], "sample_ids": ["y8WEcpOlT3I", "zTLVJCo4WEE"], "start_seconds": ["40", "30"], "properties": ["wind, speak, buffeting", "two people, adult, speak"], "captions_pred_video": ["on how to use a sewing machine youtube", "- a boy with a rifle aiming at a target"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman speaks and crickets chirp"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "a stream of water flows as people talk and wind blows"], "sample_ids": ["zofjfKhqLk8", "xBxDz0CFVn0"], "start_seconds": ["10", "30"], "properties": ["background, metal, clings", "stream, water, flow"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage is blurry and out of focus"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["birds chirp and a dog breathes heavily", "some tunes played by whistling"], "sample_ids": ["y2ZBGpgbhHM", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["dog, chirp, breathe", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["birds chirping and a dog panting", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "people speak as gunfire rings out"], "sample_ids": ["y1saVTXsKwc", "wqTCwqVRDlk"], "start_seconds": ["80", "80"], "properties": ["a, dog, talk", "gunfire, ring, speak"], "captions_pred_video": ["a dog playing with a pink ball", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a dog barks and a man speaks", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["birds chirp then an animal grunts", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tDlysoZiA1I", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["animal, grunt, chirp", "a woman, something, fried"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "- a woman cooking in the kitchen"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "a child speaks in closed space"], "sample_ids": ["w5W5Kqtc8E", "yW6FWLSLkx4"], "start_seconds": ["100", "40"], "properties": ["water, splashes, motorboat", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "water splashes and a door squeaks"], "sample_ids": ["vZAw4apG0Es", "sdXV-ylviw"], "start_seconds": ["30", "190"], "properties": ["background, tick, repeat", "sound, splash, door"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a dog barks and taps with background noise "], "question": "which entity has a door?", "label": 1}, {"captions": ["long loud burping by a man", "birds chirp and objects are moved around"], "sample_ids": ["xmiUIOhtZyQ", "yPUYU6t3rwo"], "start_seconds": ["60", "370"], "properties": ["loud, burp, man", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["homer simpson drinking a beer", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a person burps and music plays in the background ", "insects buzz and a man speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["wvKpEYswXO0", "yajyRTUQk3U"], "start_seconds": ["150", "400"], "properties": ["plastic, tap, speak", "a woman, something, fried"], "captions_pred_video": ["of the person preparing food in the kitchen", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a woman is speaking while food is frying in the background"], "question": "what is being tapped on in the first picture?", "label": 0}, {"captions": ["a man speaks as a vehicle engine idles", "a child speaks in closed space"], "sample_ids": ["shmR4OZtzqA", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["man, engine, idle", "child, space, speak"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man speaks while a motor runs", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a horn beeps twice followed by a clang and then some footsteps and another beep", "a stream of water flows quickly"], "sample_ids": ["wSVhSdj0F0", "wbHTKEJZyhc"], "start_seconds": ["10", "20"], "properties": ["beep, clang, footsteps", "stream, water, flow"], "captions_pred_video": [null, "footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a waterfall is flowing and people are speaking "], "question": "which entity is moving", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "a motor idles, accelerates, then slows down."], "sample_ids": ["tQWGZLItBXk", "vYkA3cfXp5Q"], "start_seconds": ["170", "30"], "properties": ["voice, music, whoosh", "speed, idle, accelerate"], "captions_pred_video": ["worms revolution screenshots", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "an engine is idling"], "question": "which entity is more like a machine", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "an insect buzzes around continuously"], "sample_ids": ["xfaoyyzw2WU", "v25l1jef3JY"], "start_seconds": ["180", "0"], "properties": ["loud, jet engine, roar", "buzzes, continuously, insect"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a fly is buzzing around a microphone "], "question": "which entity is quieter", "label": 1}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "a stream of water runs briefly"], "sample_ids": ["wqZ135Ssz0", "x-PeY8Yb8M4"], "start_seconds": ["60", "300"], "properties": ["two men, woman, birds", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a man speaks as crickets sing", "a train horn blows as it passes by"], "sample_ids": ["ryFDPxgDOGc", "zVacuqSb4LI"], "start_seconds": ["570", "30"], "properties": ["a, crickets, sing", "horn, blows, train"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which is louder", "label": 0}, {"captions": ["people converse in the distance as a clock ticks", "vehicles pass by on a roadway"], "sample_ids": ["vZAw4apG0Es", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["people, clock, converse", "pass, vehicle, roadway"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a clock is ticking and people are talking", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks over intermittent keyboard taps", "a man speaks over intermittent keyboard taps"], "sample_ids": ["tw76HGONaKg", "tw76HGONaKg"], "start_seconds": ["570", "570"], "properties": ["audio, man, keyboard", "audio, man, keyboard"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a man speaks and types on a computer keyboard "], "question": "which entity is a video", "label": 1}, {"captions": ["a small engine idles continuously", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["y5WII6cTH7k", "wDVMhEdTiVw"], "start_seconds": ["40", "30"], "properties": ["engine, idle, continuously", "gun, shoot, water"], "captions_pred_video": ["footage of a sewing machine stitching a red and white hat", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["an engine is knocking and vibrating ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is not a gun?", "label": 0}, {"captions": ["birds twitter and chirp and clatter", "a man speaks while turning a water faucet on"], "sample_ids": ["yeFvk9x0wWI", "vf9xf3vMsGM"], "start_seconds": ["30", "540"], "properties": ["chirp, twitter, clatter", "A man speaks while turning a water faucet on."], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "of the person washing their hands under the faucet"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking while water is running in the background"], "question": "which entity is silent", "label": 1}, {"captions": ["a train engine runs and a horn blows", "water flows and trickles"], "sample_ids": ["zPX9o1uDiI", "tB7hWb9gTuQ"], "start_seconds": ["40", "30"], "properties": ["engine, horn, run", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a woman speaks and is crumpling paper", "a toilet flushes and a female speaks"], "sample_ids": ["xvDdE3zNf8Y", "yaln9y8I7ms"], "start_seconds": ["120", "230"], "properties": ["A, crumple, paper", "female, flushes, toilet"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman speaks and crumples paper", "a toilet flushes and a man speaks"], "question": "which woman is speaking", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["zofjfKhqLk8", "ukg5L09Wpvo"], "start_seconds": ["10", "150"], "properties": ["background, metal, clings", "clickety-clack, train, whistle"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "wind blowing followed by a zoom"], "sample_ids": ["yJ0TePmaOo", "vr8ZXjEBhMQ"], "start_seconds": ["390", "150"], "properties": ["two hard objects, man, speak", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "wind blows as people chatter quietly"], "sample_ids": ["slZLHwNbbt4", "xBxDz0CFVn0"], "start_seconds": ["300", "30"], "properties": ["clap, distance, horn", "wind, chatter, people"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "footage is blurry and out of focus"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "some tunes played by whistling"], "sample_ids": ["yVumC9TGknc", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["humming, clock, birds", "tune, play, whistling"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a series of beeps and chirps", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a man speaks as a machine runs", "dishes cling together then a man begins to speak"], "sample_ids": ["vD6lYD1l0BY", "sQGXqGcwOTc"], "start_seconds": ["330", "3"], "properties": ["a, machine, run", "cling, speak, dishes"], "captions_pred_video": ["game controller being held in the hands of the person", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking as a machine runs?", "label": 0}, {"captions": ["a man speaks as a motor runs in the background", "an engine sputters followed by a car zooming by"], "sample_ids": ["xZepNM9qcRA", "u5RmF3c3Aw"], "start_seconds": ["30", "60"], "properties": ["background, motor, run", "engine, car, zoom"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", null], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a race car accelerates and skids with wind noise in the background "], "question": "which entity has a car zooming by?", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["y2ZBGpgbhHM", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["animal, growl, bird", "motor noise, horn, siren"], "captions_pred_video": [null, "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["birds chirping and a dog panting", "a truck is honking its horn and a siren is blaring "], "question": "which entity is followed by a horn honking and a siren wailing?", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "an engine runs loudly"], "sample_ids": ["sDSppXIlJrs", "vqZuVbG6-HI"], "start_seconds": ["27", "130"], "properties": ["microphone, water, wind", "loud, engine, run"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "footage is blurry because it's raining outside"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a loud engine muffles a man as he speaks"], "sample_ids": ["tOSWIURC-4", "xyx6eNVEYRY"], "start_seconds": ["0", "380"], "properties": ["engine, work, nearby", "loud, engine, muffles"], "captions_pred_video": [null, "footage of a helicopter landing on a runway at an airport"], "captions_pred_audio": ["a lawn mower is running ", "an aircraft engine is running and a man is speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vSeGhaZt-aI", "wz7N8YRy74I"], "start_seconds": ["50", "30"], "properties": ["water, bubbles, speak", "rooster, crow, background, men"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "a propeller rotates loudly and intensely"], "sample_ids": ["w8uLijTqtlU", "ugHJF0hfYkg"], "start_seconds": ["70", "10"], "properties": ["wind, microphone, noise", "loud, intense, propeller"], "captions_pred_video": ["footage is blurry and shaky", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["the wind is blowing strongly", "a helicopter is flying overhead "], "question": "which is louder", "label": 0}, {"captions": ["loud intermittent buzzing with intermittent laughter", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sLUnaPT5gM8", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["loud, laughter, intermittent", "airplane, boy, fly"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a clock ticktocks", "a machine engine runs and a man speaks"], "sample_ids": ["v-g-j2uTByM", "vs65y4qmyBE"], "start_seconds": ["30", "340"], "properties": ["ticktocks, clock, ticktocks", "engine, run, man"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "a car is engulfed in flames on the side of the road"], "captions_pred_audio": ["a clock is ticking loudly", "a heavy engine is running and men are speaking "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a man is snoring loudly and repeatedly", "a woman speaks over sizzling noise"], "sample_ids": ["sncRqQ67iJU", "yajyRTUQk3U"], "start_seconds": ["460", "400"], "properties": ["loud, repeatedly, man", "noise, woman, speak"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a person is snoring", "a woman is speaking while food is frying in the background"], "question": "which entity is speaking over noise", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "distant men speak as a spray can nozzle is depressed"], "sample_ids": ["su6FAOcOA8c", "rwtmaKiCcQU"], "start_seconds": ["4", "30"], "properties": ["engine, idle, woman", "nozzle, depressed, spray can"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "shows a man spraying paint on a wall with a spray gun"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "spraying and people speaking"], "question": "which entity is about a spray can?", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["tQWGZLItBXk", "vlS6YMeWAPo"], "start_seconds": ["170", "40"], "properties": ["voice, music, whoosh", "sheep, baa, birds"], "captions_pred_video": ["worms revolution screenshots", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a goat bleats and birds chirp"], "question": "which entity is more animal", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vddP56-ogds", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["water, flow, laugh", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["a child yells and another yells", "a woman and man speak while food is frying"], "sample_ids": ["vMDHu7Lxcgw", "zk-xJGQU8-4"], "start_seconds": ["410", "130"], "properties": ["two, yell, child", "food, man, woman"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "a man and a woman cooking in a wok on the stove"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a woman is speaking while dishes are clanging and music is playing in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a woman speaks with water running", "people speak as gunfire rings out"], "sample_ids": ["wTideSjRFS0", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["water, running, woman", "gunfire, ring, speak"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking and a gun is fired"], "question": "which entity is more calm", "label": 0}, {"captions": ["a car accelerates and wind blows", "a propeller rotates loudly and intensely"], "sample_ids": ["u0TrcHhkPQ", "ugHJF0hfYkg"], "start_seconds": ["20", "10"], "properties": ["accelerates, wind, blows", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a helicopter is flying overhead "], "question": "which entity is rotating", "label": 1}, {"captions": ["a small engine spits as it runs", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["sZvwOuuPGP0", "vbZ-0lGPneg"], "start_seconds": ["50", "30"], "properties": ["spits, engine, runs", "a woman, a television program, a bird"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a medium engine is running ", "a woman is speaking and a dog is whimpering"], "question": "which entity is a person", "label": 1}, {"captions": ["a vehicle engine accelerating then running on idle", "a woman speaks and other women and a man talk with her"], "sample_ids": ["vYkA3cfXp5Q", "vbpKkWvfOu4"], "start_seconds": ["30", "560"], "properties": ["engine, accelerate, idle", "a, woman, man"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["an engine is idling", "a woman is speaking and a man is speaking"], "question": "which entity is a group of people", "label": 1}, {"captions": ["a large crowd cheers and applauds", "small dogs yip and bark sharply"], "sample_ids": ["rqfQRErjfk8", "v-wcQf4BDY0"], "start_seconds": ["170", "120"], "properties": ["crowd, cheers, applauds", "bark, yip, sharply"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a dog barks and growls"], "question": "which entity is more likely to be a group of people", "label": 0}, {"captions": ["an electronic device bleeps once", "multiple people speak and children yell while water gurgles"], "sample_ids": ["tHJ6JSa8Y4", "vb1fPSDI4c"], "start_seconds": ["0", "30"], "properties": ["bleeps, electronic, device", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["a clock is ticking and beeping", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["the rumbling of a bus followed by a soft male voice", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vK93VuO0yNc", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["male voice, bus, rumble", "a woman, something, fried"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a car drives by with wind noise in the background ", "a woman is speaking while food is frying in the background"], "question": "which entity is about a woman talking?", "label": 1}, {"captions": ["a man speaks as crickets sing", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["ryFDPxgDOGc", "y8WEcpOlT3I"], "start_seconds": ["570", "40"], "properties": ["a, crickets, sing", "harsh, wind, blows"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking with wind noise in the background "], "question": "which entity is a man speaking as crickets sing?", "label": 0}, {"captions": ["a helicopter engine runs", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["t5ZbXbniOWk", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["engine, helicopter, run", "engine, laugh, loud"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a helicopter is flying overhead ", "a jet engine roars "], "question": "which entity is not a helicopter?", "label": 1}, {"captions": ["water runs into a sink while men speak", "a telephone rings followed by a woman talking"], "sample_ids": ["vzceMbklWc", "tGcFnX0GHI"], "start_seconds": ["180", "0"], "properties": ["water, sink, run", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and a man is speaking", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "people applaud and hoot and chat quietly"], "sample_ids": ["vh30P49Po6s", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["loud, continuous, quacks", "people, applaud, hoot"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "people are clapping and speaking with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a motor runs steadily as a man speaks, then the motor revs twice", "water flows and trickles"], "sample_ids": ["ylpYOorfH4o", "tB7hWb9gTuQ"], "start_seconds": ["410", "30"], "properties": ["motor, run, steady", "water, flow, trickle"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking and an engine is revving", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["u21-Z5gJCB8", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["background, voice, man", "a, scream, girl"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "a man talks while metallic objects are rapped and steam is released"], "sample_ids": ["ukxt9I7eMMg", "vuUVPzd2FXw"], "start_seconds": ["30", "160"], "properties": ["continuous, woman, speaking", "a, steam, release"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "of the person cooking on the grill with a spatula"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking and dishes are clanging"], "question": "which entity has a woman speaking towards the end?", "label": 0}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "a car accelerates and wind blows"], "sample_ids": ["ziUT9IFTkjg", "u0TrcHhkPQ"], "start_seconds": ["10", "20"], "properties": ["background, birds, rustling", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a race car accelerates and revs its engine "], "question": "which entity is a car?", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["yks4cLgIDMc", "ziUT9IFTkjg"], "start_seconds": ["170", "10"], "properties": ["background, speaking, child", "background, birds, rustling"], "captions_pred_video": ["footage of two kids wrestling on the floor", null], "captions_pred_audio": ["a man is speaking and a child is crying", "birds are chirping and a chime is ringing "], "question": "which entity has birds in the background?", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["xOZfdgAgJ9o", "tdWhHV3X25Q"], "start_seconds": ["40", "60"], "properties": ["woman, whimpering, speaking", "applause, audience, yells"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["paper folding and crinkling", "wind blows as people chatter quietly"], "sample_ids": ["zPpG3RD8lSs", "xBxDz0CFVn0"], "start_seconds": ["20", "30"], "properties": ["paper, fold, crinkle", "wind, chatter, people"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "footage is blurry and out of focus"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sU53zg9Jp7s", "su6FAOcOA8c"], "start_seconds": ["380", "4"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "engine, idle, woman"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a woman is speaking and a subway train is moving "], "question": "which entity is a recording of a person speaking?", "label": 1}, {"captions": ["birds twitter and chirp and clatter", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["yeFvk9x0wWI", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["chirp, twitter, clatter", "male, duck, laugh"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", null], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking and ducks are quacking"], "question": "which entity is a human speaking?", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "winds blows roughly as a vehicle races past"], "sample_ids": ["uYT5gxnyMWM", "xjvTpk2Zpr8"], "start_seconds": ["50", "70"], "properties": ["person, spray, yell", "wind, blows, vehicle"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a jet engine roars and wind blows "], "question": "which entity is more likely to be a person", "label": 0}, {"captions": ["people speak and laugh as a child speaks", "a person whistles a meandering tune"], "sample_ids": ["sa6TLVbooCc", "uFoga8sHpiw"], "start_seconds": ["240", "90"], "properties": ["people, laugh, child", "person, tune, whistle"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "footage of a bird in a cage"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a person whistles a song"], "question": "which entity is a person?", "label": 1}, {"captions": ["continuous sneezing together with speech", "a clock ticks quietly and rhythmically"], "sample_ids": ["x4dZyf9Gbj0", "u7C-AEBQM"], "start_seconds": ["130", "30"], "properties": ["continuous, sneeze, speech", "ticks, rhythmic, quiet"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a woman sneezes and speaks", "a ticktock of a clock"], "question": "which entity is quieter", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "a group of people chatter and talk as multiple horns honk in the background"], "sample_ids": ["wnpJndXuxLc", "yLy-WycbVVE"], "start_seconds": ["50", "30"], "properties": ["blows, vehicle, train", "background, people, talk"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a soccer field in a stadium with yellow and red seats"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a man is speaking and a church bell is ringing with wind noise in the background "], "question": "which entity is talking", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "plastic is tapped on while someone speaks"], "sample_ids": ["yJ0TePmaOo", "wvKpEYswXO0"], "start_seconds": ["390", "150"], "properties": ["two hard objects, man, speak", "plastic, tap, speak"], "captions_pred_video": [null, "of the person preparing food in the kitchen"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a woman is speaking and tapping with background noise and water running "], "question": "which object is being rubbed together", "label": 0}, {"captions": ["a man speaks while turning a water faucet on", "an engine runs loudly"], "sample_ids": ["vf9xf3vMsGM", "vqZuVbG6-HI"], "start_seconds": ["540", "130"], "properties": ["A man speaks while turning a water faucet on.", "loud, engine, run"], "captions_pred_video": ["of the person washing their hands under the faucet", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a lawn mower is running and men are speaking "], "question": "which entity is quieter", "label": 0}, {"captions": ["a door slams shut and an object moves on a hard surface", "an infant crying as a woman laughs"], "sample_ids": ["zkKdxzNC97Y", "xhmRY9yhC7c"], "start_seconds": ["27", "20"], "properties": ["hard, surface, door", "a, laugh, infant"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a door is opened and closed", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["birds fly and flutter around", "people applaud and hoot and chat quietly"], "sample_ids": ["wGKgwOP3h30", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["fly, flutter, around", "people, applaud, hoot"], "captions_pred_video": ["of the pigeons in the coop", null], "captions_pred_audio": ["pigeons coo and flap their wings", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 0}, {"captions": ["a propeller moves loudly nearby", "a duck quacks loudly and continuously"], "sample_ids": ["ugHJF0hfYkg", "vh30P49Po6s"], "start_seconds": ["10", "30"], "properties": ["loud, propeller, move", "loud, continuous, quacks"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a helicopter is flying overhead ", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["uYT5gxnyMWM", "wqZ135Ssz0"], "start_seconds": ["50", "60"], "properties": ["person, spray, yell", "two men, woman, birds"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["yswmmRZFItk", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["background, frog, croak", "a woman, laughs, animal"], "captions_pred_video": ["a close up of a frog in the water", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a frog is croaking", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["wind noise makes sound into a microphone", "a car speeding up in the distance"], "sample_ids": ["w8uLijTqtlU", "u0TrcHhkPQ"], "start_seconds": ["70", "20"], "properties": ["wind, microphone, noise", "distance, car, speed"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["the wind is blowing strongly", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a clock ticktocks continuously", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["vlJS7LN2XyM", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["ticktocks, clock, ticktocks continuously", "engine, revs, vehicle"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a ticktock of a clock", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a small engine spits as it runs", "an airplane engine runs"], "sample_ids": ["sZvwOuuPGP0", "yVPZ2MNWpms"], "start_seconds": ["50", "0"], "properties": ["spits, engine, runs", "engine, airplane, runs"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a medium engine is running ", "a car is driving by on the road "], "question": "which entity is a machine", "label": 1}, {"captions": ["a vehicle accelerates squealing tires", "a toilet flushes and a female speaks"], "sample_ids": ["sd7xVssqlw", "yaln9y8I7ms"], "start_seconds": ["50", "230"], "properties": ["accelerates, tires, squealing", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a toilet flushes and a man speaks"], "question": "which entity is a toilet?", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "a man speaks as a motor runs in the background"], "sample_ids": ["vddP56-ogds", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["water, flow, laugh", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "some tunes played by whistling"], "sample_ids": ["x5cuQjOdM3E", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["cat, talk, meow", "tune, play, whistling"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a cat meows and a woman speaks", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a train horn blows as it passes by"], "sample_ids": ["s4Uz1Ffgo04", "zVacuqSb4LI"], "start_seconds": ["100", "30"], "properties": ["roars, background, people speaking", "horn, blows, train"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "water pouring and bubbling"], "sample_ids": ["sofxkNWaP0s", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["wind, engine, louder", "water, bubbles, pouring"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an animal quacks rapidly", "wind blowing followed by a zoom"], "sample_ids": ["vh30P49Po6s", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["animal, quacks, rapidly", "wind, blow, zoom"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a duck is quacking loudly", "wind blows and a chainsaw cuts through wood "], "question": "which entity is not a zoom?", "label": 0}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["wRBHTgrbiwg", "y2bVZ7rz-5M"], "start_seconds": ["50", "280"], "properties": ["bird, owl, speak", "motor noise, horn, siren"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a truck is honking its horn and a siren is blaring "], "question": "which entity is more likely to be heard in a car", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "wind blows as people chatter quietly"], "sample_ids": ["uOpoD0gGXcs", "xBxDz0CFVn0"], "start_seconds": ["120", "30"], "properties": ["chirps, woman, bird", "wind, chatter, people"], "captions_pred_video": ["a herd of cows grazing in the field", "footage is blurry and out of focus"], "captions_pred_audio": ["birds are chirping and a man is speaking", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a speedboat passes quickly on the water", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["tjmoSi330GM", "tDVADusiIoc"], "start_seconds": ["23", "60"], "properties": ["speed, water, boat", "water, radio, man"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which is not a speedboat", "label": 1}, {"captions": ["a motorcycle engine is idling", "a infant makes noise and is excited"], "sample_ids": ["vZAqdHZ81yA", "wIJK3-5y0kA"], "start_seconds": ["180", "30"], "properties": ["engine, motorcycle, idling", "noise, excited, infant"], "captions_pred_video": ["a motorcycle is parked on the side of the road with its rear end facing the viewer", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["an engine is idling loudly", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["a large crowd cheers and applauds", "an airplane engine runs"], "sample_ids": ["rqfQRErjfk8", "yVPZ2MNWpms"], "start_seconds": ["170", "0"], "properties": ["crowd, cheers, applauds", "engine, airplane, runs"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a car is driving by on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a dog barks and whimpers", "small dogs yip and bark sharply"], "sample_ids": ["sShpyu2l4YQ", "v-wcQf4BDY0"], "start_seconds": ["0", "120"], "properties": ["barks, whimpers, dog", "bark, yip, sharply"], "captions_pred_video": ["the puppies are playing with a toy", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a dog is barking and growling", "a dog barks and growls"], "question": "which dog is more playful", "label": 1}, {"captions": ["a duck quacks continuously", "some tunes played by whistling"], "sample_ids": ["vh30P49Po6s", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["quacks, continuously, duck", "tune, play, whistling"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a duck is quacking loudly", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "some liquid flows while a woman laughs and man talks"], "sample_ids": ["sHbXC6na9hg", "vddP56-ogds"], "start_seconds": ["0", "30"], "properties": ["a person, saw, wood", "liquid, laughs, man"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", null], "captions_pred_audio": ["an engine is idling and vibrating", "water is running and gurgling and a man is speaking"], "question": "which entity is about a person cutting wood?", "label": 0}, {"captions": ["a woman speaks and a baby laughs", "a man speaks as a car is passing by"], "sample_ids": ["tOj4tdLRaA", "sK4u5T8hW78"], "start_seconds": ["70", "30"], "properties": ["woman, laugh, baby", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a baby?", "label": 0}, {"captions": ["a man makes an exclamation, then another man speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["xO-Q2BlIIPU", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["two men, exclamation, speak", "engine, laugh, loud"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "a horn blasts as warning bells ring"], "sample_ids": ["zofjfKhqLk8", "zgUgkpk78xU"], "start_seconds": ["10", "70"], "properties": ["background, metal, clank", "horn, bells, ring"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a train blows its horn as it speeds down the tracks "], "question": "which entity is a warning device", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "a girl talking, laughing and sneezing noise"], "sample_ids": ["yZrFNS7GFBQ", "y4tPJXBKDig"], "start_seconds": ["30", "20"], "properties": ["pigeon, buzzes, insect", "a, noise, talk"], "captions_pred_video": ["of the bird in the cage", "footage of the woman wiping her nose with a tissue"], "captions_pred_audio": ["an owl hoots in the background ", "a woman is speaking and coughing with background noise and breathing "], "question": "which entity is talking", "label": 1}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "a stream of water flows as people talk and wind blows"], "sample_ids": ["t69a8aRKhmc", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["a, b, c", "stream, water, flow"], "captions_pred_video": ["footage is blurry and out of focus", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["birds chirp and wind blows", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["sxIvBMSavMQ", "y8WEcpOlT3I"], "start_seconds": ["210", "40"], "properties": ["birds, chirp, wind", "harsh, wind, blows"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "on how to use a sewing machine youtube"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking with wind noise in the background "], "question": "which entity is more calm", "label": 0}, {"captions": ["a man speaks over intermittent keyboard taps", "a person uses a saw to cut some wood"], "sample_ids": ["tw76HGONaKg", "sHbXC6na9hg"], "start_seconds": ["570", "0"], "properties": ["audio, man, keyboard", "a person, saw, wood"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "a man using a tractor to cut a log into firewood youtube"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "an engine is idling and vibrating"], "question": "which entity is a video of a person cutting wood?", "label": 1}, {"captions": ["a man talks as several small engines run", "a stream of water runs briefly"], "sample_ids": ["u9A6VZQCZpU", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["a, man, talk", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "an small aircraft engine runs and a boy speaks"], "sample_ids": ["xjhAnI2q6hM", "xSKJGCItUWE"], "start_seconds": ["6", "10"], "properties": ["engine revs, vehicle, people", "engine, run, boy"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "footage of the helicopter flying in the room"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a high pitched engine is running and a child speaks"], "question": "which entity has a boy speaking?", "label": 1}, {"captions": ["wind blowing followed by a zoom", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["vr8ZXjEBhMQ", "yDoT73BWsdA"], "start_seconds": ["150", "10"], "properties": ["wind, blow, zoom", "engine, revs, vehicle"], "captions_pred_video": ["is taken from a motorcycle's point of view", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a race car accelerates and revs its engine "], "question": "which entity is not a zoom", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "some men converse over an engine running"], "sample_ids": ["yZrFNS7GFBQ", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["pigeon, buzzes, insect", "men, converse, engine"], "captions_pred_video": ["of the bird in the cage", null], "captions_pred_audio": ["an owl hoots in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a goat bleats as a person speaks", "an infant crying as a woman laughs"], "sample_ids": ["tPJvjq9QePY", "xhmRY9yhC7c"], "start_seconds": ["40", "20"], "properties": ["bleats, person, speak", "a, laugh, infant"], "captions_pred_video": ["a dog and a sheep in a barn", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a baby cries and a man speaks", "a baby cries and a woman speaks"], "question": "which entity is crying", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "people cheer as a vehicle engine revs"], "sample_ids": ["sQGXqGcwOTc", "xjhAnI2q6hM"], "start_seconds": ["3", "6"], "properties": ["cling, speak, dishes", "engine revs, vehicle, people"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["u21-Z5gJCB8", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["background, voice, man", "harsh, wind, blows"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with wind noise in the background "], "question": "which entity has a harsh wind blowing?", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["uZesmtKZGSw", "w34HjHr6gAY"], "start_seconds": ["250", "30"], "properties": ["car, track, man", "beeps, hit, woman"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a beep sounds followed by a child speaking"], "question": "which entity has a man talking?", "label": 0}, {"captions": ["a male speaks and another male speaks", "pigeons vocalize and birds chirp"], "sample_ids": ["viuTg1M-dqg", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["two males, speaking, male", "vocalize, bird, chirp"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["an engine runs and a man speaks", "people speak as gunfire rings out"], "sample_ids": ["yT5WfYMRr-U", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["engine, run, man", "gunfire, ring, speak"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war", "label": 1}, {"captions": ["ticking continues without interruption", "a man speaks as a boat engine runs"], "sample_ids": ["v-g-j2uTByM", "wtDqrBygTcU"], "start_seconds": ["30", "30"], "properties": ["ticking, continuous, clock", "man, engine, run"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "shows a person riding on the back of a boat as it speeds through the water"], "captions_pred_audio": ["a clock is ticking loudly", "a man is speaking and a motor is running"], "question": "which is not a clock", "label": 1}, {"captions": ["children speak as a female ask them questions", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["wEBlkGWVWwE", "uYT5gxnyMWM"], "start_seconds": ["260", "50"], "properties": ["female, speak, questions", "a, scream, girl"], "captions_pred_video": ["shows a person writing on the whiteboard", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a woman is speaking and a baby is crying"], "question": "which entity has a girl speaking followed by a scream?", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["wvKpEYswXO0", "uYT5gxnyMWM"], "start_seconds": ["150", "50"], "properties": ["water, tap, run", "female, spraying, scream"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person spraying and screaming?", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "a child speaks in closed space"], "sample_ids": ["w5W5Kqtc8E", "yW6FWLSLkx4"], "start_seconds": ["100", "40"], "properties": ["wind, blow, vehicle", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a person is snoring while sleeping", "a frog croaks as other frogs croak in the background"], "sample_ids": ["vJrjSeP17yE", "yswmmRZFItk"], "start_seconds": ["40", "0"], "properties": ["a person is sleeping, snoring, person", "background, frog, croak"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a close up of a frog in the water"], "captions_pred_audio": ["a person snoring loudly", "a frog is croaking"], "question": "which entity is a croaker", "label": 1}, {"captions": ["there are rhythmical snoring nearby", "someone snores nearby"], "sample_ids": ["ujMt0-D-x2k", "spJCm8tD9Zo"], "start_seconds": ["0", "90"], "properties": ["snoring, rhythmical, nearby", "someone snores, nearby, someone"], "captions_pred_video": ["of the dog playing with a toy on the floor", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a person is snoring loudly", "a person is snoring loudly"], "question": "which entity is more likely to be a person", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "some men converse over an engine running"], "sample_ids": ["y8WEcpOlT3I", "sCiy7QS1U"], "start_seconds": ["40", "300"], "properties": ["harsh, wind, blows", "men, converse, engine"], "captions_pred_video": ["on how to use a sewing machine youtube", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["children cheer as a man speaks then an audience screams", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["vJvryTwuAV8", "wDVMhEdTiVw"], "start_seconds": ["16", "30"], "properties": ["audience, cheer, man", "gun, shoot, water"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "water splashes and a door squeaks"], "sample_ids": ["s4Uz1Ffgo04", "sdXV-ylviw"], "start_seconds": ["100", "190"], "properties": ["roars, background, people speaking", "sound, splash, door"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a dog barks and taps with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["someone whistles a song", "a drill drills through something then people begin laughing"], "sample_ids": ["sIXTftIuUgw", "tEE3MpBt1sg"], "start_seconds": ["90", "50"], "properties": ["someone, song, whistle", "drill, something, laugh"], "captions_pred_video": [null, "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a person whistling a song", "people are laughing breathing and speaking with background noise "], "question": "which entity is a drill?", "label": 1}, {"captions": ["a door slams shut roughly", "a man speaks as a car is passing by"], "sample_ids": ["zkKdxzNC97Y", "sK4u5T8hW78"], "start_seconds": ["27", "30"], "properties": ["a door, slams, shut", "a, car, pass"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a moving object", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "paper folding and crinkling"], "sample_ids": ["s7knHCFW82w", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["blow horn, get close, train", "paper, fold, crinkle"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "the wind blows and a mouse clicks "], "question": "which is not a train", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "water splashes as an animal walks through"], "sample_ids": ["w-4gHptFNuU", "w1ir-sZ3Im8"], "start_seconds": ["21", "90"], "properties": ["engine revs, accelerates, bump", "animal, water, splashes"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a car accelerates and revs its engine ", "water splashes and gurgles as people speak"], "question": "which entity is more likely to cause a splash", "label": 1}, {"captions": ["birds chirp and wind blows", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["sxIvBMSavMQ", "zl9Dqx-j7q4"], "start_seconds": ["210", "6"], "properties": ["birds, chirp, wind", "engine, laugh, loud"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "footage of a man driving a car in the dark"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a telephone rings and a bird vocalizes", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["skd2PphS6oI", "sLUnaPT5gM8"], "start_seconds": ["190", "0"], "properties": ["ring, bird, vocalize", "loud, laughter, intermittent"], "captions_pred_video": ["a vintage telephone on a towel with a pair of scissors and a pair of pliers next to it", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a telephone bell rings repeatedly ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["paper is repeatedly crumpled and crinkled", "wind blowing followed by a zoom"], "sample_ids": ["vms5XGTDVQc", "vr8ZXjEBhMQ"], "start_seconds": ["220", "150"], "properties": ["paper, crumpled, crinkled", "wind, blow, zoom"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["paper is crumpled and crinkled", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["pigeons vocalize and birds chirp", "people applaud and hoot and chat quietly"], "sample_ids": ["uiS58TNyUiw", "wwyfGO2J4"], "start_seconds": ["430", "90"], "properties": ["vocalize, bird, chirp", "people, applaud, hoot"], "captions_pred_video": ["of the pigeon in the cage", null], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be a symphony", "label": 1}, {"captions": ["a baby laugh at a sputter", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["sLUnaPT5gM8", "yDoT73BWsdA"], "start_seconds": ["0", "10"], "properties": ["laugh, sputter, baby", "engine, revs, vehicle"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "wind blows as people chatter quietly"], "sample_ids": ["wtDqrBygTcU", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["man, engine, run", "wind, chatter, people"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a motor is running", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["zY3icUyMdh8", "uYT5gxnyMWM"], "start_seconds": ["20", "50"], "properties": ["dog, bark, engine", "a, scream, girl"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a toilet flushes and water drains", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["sfAvvZwdLCY", "tdWhHV3X25Q"], "start_seconds": ["20", "60"], "properties": ["water drains, flushes, water", "applause, audience, yells"], "captions_pred_video": ["footage of the toilet in the bathroom", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "a man speaks as a vehicle engine idles"], "sample_ids": ["s7knHCFW82w", "shmR4OZtzqA"], "start_seconds": ["30", "30"], "properties": ["blow horn, get close, train", "man, engine, idle"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a man speaks while a motor runs"], "question": "which entity is stationary", "label": 1}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "wind blowing followed by a zoom"], "sample_ids": ["ukg5L09Wpvo", "vr8ZXjEBhMQ"], "start_seconds": ["150", "150"], "properties": ["clickety-clack, train, whistle", "wind, blow, zoom"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["water gurgles, metal squeaks and the water stops", "people speak as gunfire rings out"], "sample_ids": ["x4a9YGIw4ok", "wqTCwqVRDlk"], "start_seconds": ["120", "80"], "properties": ["water, gurgles, stops", "gunfire, ring, speak"], "captions_pred_video": ["footage is blurry and out of focus", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a toilet flushes and water splashes", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["a woman talks while a baby cries and a man whispers", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["smDKStoHBJo", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["a, talk, baby, cry", "engine, idle, woman"], "captions_pred_video": ["a man holding a crying baby in his arms", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman talking?", "label": 0}, {"captions": ["birds chirp and a dog breathes heavily", "water flows and trickles"], "sample_ids": ["y2ZBGpgbhHM", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["dog, chirp, breathe", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["birds chirping and a dog panting", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "a stream of water runs briefly"], "sample_ids": ["sOa7g-44Dag", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["audio, scratching, man", "stream, water, run"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "small dogs yip and bark sharply"], "sample_ids": ["wTideSjRFS0", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["food, sizzle, woman", "bark, yip, sharply"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "a toilet flushes and a female speaks"], "sample_ids": ["uPDn2BFTHk", "yaln9y8I7ms"], "start_seconds": ["140", "230"], "properties": ["lady, laugh, baby", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "a duck quacks continuously"], "sample_ids": ["wsHBIgzs9Fs", "vh30P49Po6s"], "start_seconds": ["50", "30"], "properties": ["horn, continuous, buzzing", "quacks, continuously, duck"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["an airplane accelerates briefly", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["zjTG0gaGCUI", "wqZ135Ssz0"], "start_seconds": ["80", "60"], "properties": ["accelerates, airplane, briefly", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a jet engine roars as wind blows ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a moving object", "label": 0}, {"captions": ["a vehicle accelerates squealing tires", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sd7xVssqlw", "yajyRTUQk3U"], "start_seconds": ["50", "400"], "properties": ["accelerates, tires, squealing", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["a vehicle engine accelerating then running on idle", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vYkA3cfXp5Q", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["engine, accelerate, idle", "men, talk, cars"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["an engine is idling", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is a video of a vehicle engine accelerating then running on idle?", "label": 0}, {"captions": ["a frog vocalizes as birds chirp", "three men talk while wind blows and some liquid flows"], "sample_ids": ["wqUmIEzuNz4", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["frog, bird, vocalize", "three men, wind, flow"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a cat meows and rustles", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a frog?", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "a man speaks on a radio as wind blows"], "sample_ids": ["y2ZBGpgbhHM", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["birds, tweet, pant", "man, radio, blows"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["birds chirping and a dog panting", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking on a radio as wind blows?", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "some men converse over an engine running"], "sample_ids": ["vSeGhaZt-aI", "sCiy7QS1U"], "start_seconds": ["50", "300"], "properties": ["water, bubbles, speak", "men, converse, engine"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", null], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a child babbles as a woman speaks", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["wEBlkGWVWwE", "wDVMhEdTiVw"], "start_seconds": ["260", "30"], "properties": ["a, babble, woman", "gun, shoot, water"], "captions_pred_video": ["shows a person writing on the whiteboard", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used for hunting", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["zcDwZ6W7E3E", "zj2R0XoFr5k"], "start_seconds": ["180", "50"], "properties": ["man, speak, motorcycles", "airplane, boy, fly"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["various birds chirp and squeal, and an animal grunts", "a child speaks in closed space"], "sample_ids": ["tDlysoZiA1I", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["animal, grunts, chirps", "child, space, speak"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["rain falls on a surface as men speak and thunder roars", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["w0xsN8X18Y", "vlS6YMeWAPo"], "start_seconds": ["30", "40"], "properties": ["rain, thunder, surface", "sheep, baa, birds"], "captions_pred_video": [null, "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "water flows and trickles"], "sample_ids": ["y4tPJXBKDig", "tB7hWb9gTuQ"], "start_seconds": ["20", "30"], "properties": ["a, noise, talk", "water, flow, trickle"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "sucking and grunting followed by slurping with birds in the background"], "sample_ids": ["sEprKHm8Sj8", "yYEVLuqEytU"], "start_seconds": ["90", "40"], "properties": ["car, tires, slows", "grunt, slurp, background"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "a baby goat is being petted by a person's hand"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "several sheep bleat and a man speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "a duck quacks loudly and continuously"], "sample_ids": ["zl9Dqx-j7q4", "vh30P49Po6s"], "start_seconds": ["6", "30"], "properties": ["motors rev, laugh, loudly", "loud, continuous, quacks"], "captions_pred_video": ["footage of a man driving a car in the dark", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a jet engine roars ", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["s4Uz1Ffgo04", "yDoT73BWsdA"], "start_seconds": ["100", "10"], "properties": ["roars, background, people speaking", "engine, revs, vehicle"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a race car accelerates and revs its engine "], "question": "which vehicle is revving its engine", "label": 1}, {"captions": ["a motorcycle engine works nearby", "small dogs growl, bark and yip."], "sample_ids": ["tOSWIURC-4", "sShpyu2l4YQ"], "start_seconds": ["0", "0"], "properties": ["engine, work, nearby", "growl, bark, yip"], "captions_pred_video": [null, "the puppies are playing with a toy"], "captions_pred_audio": ["a lawn mower is running ", "a dog is barking and growling"], "question": "which entity is more active", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "popping and crackling repeats as men yell and laugh"], "sample_ids": ["sQGXqGcwOTc", "rqu8iB22IY"], "start_seconds": ["3", "5"], "properties": ["cling, speak, dishes", "sound, repeats, laugh"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", null], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a dog barks and a man speaks while music plays "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["a man speaks as a machine runs", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vD6lYD1l0BY", "vbZ-0lGPneg"], "start_seconds": ["330", "30"], "properties": ["a, machine, run", "a woman, a television program, a bird"], "captions_pred_video": ["game controller being held in the hands of the person", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["a motor idles, accelerates, then slows down.", "people speak in a closed space"], "sample_ids": ["vYkA3cfXp5Q", "sTpirNYo8vQ"], "start_seconds": ["30", "30"], "properties": ["speed, idle, accelerate", "people, space, speak"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "of a man taking a selfie on a bus"], "captions_pred_audio": ["an engine is idling", "a man is speaking while a car is revving and accelerating "], "question": "which entity is stationary", "label": 1}, {"captions": ["women speak and laugh as wind blows", "a child speaks in closed space"], "sample_ids": ["un9VQlzgZM", "yW6FWLSLkx4"], "start_seconds": ["5", "40"], "properties": ["wind, speak, laugh", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "waves crash against a shoreline and people speak"], "sample_ids": ["s59PfAghdkM", "yFB25fqfU8I"], "start_seconds": ["0", "300"], "properties": ["bird, chirp, background, horse, neigh", "wave, crash, shoreline"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "footage of a person surfing in the ocean"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more active", "label": 1}, {"captions": ["loud, continuous burping", "a toilet flushes and a female speaks"], "sample_ids": ["y636gklDioE", "yaln9y8I7ms"], "start_seconds": ["20", "230"], "properties": ["loud, continuous, burping", "female, flushes, toilet"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "footage is blurry and out of focus"], "captions_pred_audio": ["a person burps loudly several times", "a toilet flushes and a man speaks"], "question": "which entity is silent", "label": 1}, {"captions": ["water rushes by followed by a motorcycle zooming by in the distance", "some tunes played by whistling"], "sample_ids": ["s4Uz1Ffgo04", "u6BnG6YZqJ4"], "start_seconds": ["100", "0"], "properties": ["water, rushes, motorcycle", "tune, play, whistling"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a flush is followed by gurgling water, then another flush", "a vehicle engine accelerating then running on idle"], "sample_ids": ["tqR406bGiE", "vYkA3cfXp5Q"], "start_seconds": ["40", "30"], "properties": ["flush, water, gurgle", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a toilet is flushed", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "water is sprayed across a hard surface"], "sample_ids": ["vBHyYJ8pL0", "sQwlkXjQabo"], "start_seconds": ["2", "10"], "properties": ["noise, door, opening", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "an infant crying as a woman laughs"], "sample_ids": ["xjvTpk2Zpr8", "xhmRY9yhC7c"], "start_seconds": ["70", "20"], "properties": ["engine, run, wind", "a, laugh, infant"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "paper is crumpling consistently"], "sample_ids": ["v0x1odnXtP0", "v5cSxLaHADY"], "start_seconds": ["210", "0"], "properties": ["keyboard, type, computer", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["how to make money on youtube in spanish", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a person is typing on a keyboard", "paper is crumpled and crinkled"], "question": "which object is crumpling", "label": 0}, {"captions": ["a few ducks quack and scamper and a man speaks", "a car speeding up in the distance"], "sample_ids": ["w2bYrCVLT60", "u0TrcHhkPQ"], "start_seconds": ["120", "20"], "properties": ["ducks, speak, quack", "distance, car, speed"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", null], "captions_pred_audio": ["ducks are quacking and a man is speaking", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "water splashes as an animal walks through"], "sample_ids": ["x5cuQjOdM3E", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["cat, talk, meow", "animal, water, splashes"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a cat meows and a woman speaks", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vs65y4qmyBE", "vb1fPSDI4c"], "start_seconds": ["340", "30"], "properties": ["engine, run, man", "multiple, people, yell"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a person whistles a meandering tune", "a duck quacks continuously"], "sample_ids": ["uFoga8sHpiw", "vh30P49Po6s"], "start_seconds": ["90", "30"], "properties": ["person, tune, whistle", "quacks, continuously, duck"], "captions_pred_video": ["footage of a bird in a cage", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a person whistles a song", "a duck is quacking loudly"], "question": "which entity is a bird", "label": 1}, {"captions": ["a consistent ticking pattern", "insects humming with a dog barking and small goat bleating"], "sample_ids": ["sCeWURVHfOM", "tIY7qOV3rEM"], "start_seconds": ["30", "0"], "properties": ["ticking, pattern, clock", "animal, bark, dog, barking, small, goat, bleating"], "captions_pred_video": ["- a close-up view of the clock's inner workings", "a dog is standing in the middle of a dirt road in the woods"], "captions_pred_audio": ["ticking of a clock", "a dog is barking and a cat is meowing"], "question": "which animal is barking", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "the clinking of a train bell with the humming of an engine and a train horn blowing"], "sample_ids": ["zFjIWfSD-4", "zgUgkpk78xU"], "start_seconds": ["410", "70"], "properties": ["People, motor, brakes", "clinking, humming, horn"], "captions_pred_video": [null, "of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a train blows its horn as it speeds down the tracks "], "question": "which entity is a train?", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "a man speaks as a car is passing by"], "sample_ids": ["sShpyu2l4YQ", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["growl, bark, yip", "a, car, pass"], "captions_pred_video": ["the puppies are playing with a toy", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a dog is barking and growling", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more passive", "label": 1}, {"captions": ["a male speaks and another male speaks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["viuTg1M-dqg", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["two males, speaking, male", "stream, water, flow"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["an airplane engine spools and people speak", "a person is burping then speaks and laughs"], "sample_ids": ["wTjoRj1se3U", "wAAkbZToh8"], "start_seconds": ["390", "0"], "properties": ["airplane, engine, spool", "burp, laugh, speak"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", null], "captions_pred_audio": ["a jet engine is running and people are talking", "a man burps and a woman speaks"], "question": "which entity is a person?", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "food is frying while a woman speaks"], "sample_ids": ["y4tPJXBKDig", "yhQ2Lg-7qDY"], "start_seconds": ["20", "130"], "properties": ["a, noise, talk", "food, woman, speak"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "a pan filled with meat and sauce being cooked on a stove top"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a faucet is running and a man is speaking"], "question": "which entity is a person", "label": 1}, {"captions": ["a vehicle engine revs and tires squeal", "people applaud and hoot and chat quietly"], "sample_ids": ["yDoT73BWsdA", "wwyfGO2J4"], "start_seconds": ["10", "90"], "properties": ["engine revs, tires squeal, vehicle", "people, applaud, hoot"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a vehicle engine revs and tires squeal", "a vehicle engine accelerating then running on idle"], "sample_ids": ["yDoT73BWsdA", "vYkA3cfXp5Q"], "start_seconds": ["10", "30"], "properties": ["engine revs, tires squeal, vehicle", "engine, accelerate, idle"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["multiple people speak and children yell while water gurgles", "birds chirp and an owl hoots before a man speaks briefly"], "sample_ids": ["vb1fPSDI4c", "wRBHTgrbiwg"], "start_seconds": ["30", "50"], "properties": ["multiple, people, yell", "bird, owl, speak"], "captions_pred_video": [null, "of a bee pollinating the flowers in the field"], "captions_pred_audio": ["a crowd of people are talking and laughing", "birds are chirping and insects are buzzing"], "question": "which entity has more animals speaking", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["shmR4OZtzqA", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["man, engine, idle", "music, gunfire, explosion"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man speaks while a motor runs", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more calm", "label": 0}, {"captions": ["a woman speaks as she rubs two objects together", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vzxHnu-SFEw", "vJ7JPEFhyLA"], "start_seconds": ["80", "16"], "properties": ["two objects, woman, speak", "three men, wind, flow"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a man speaks as bees buzz and birds chirp", "vehicles pass by on a roadway"], "sample_ids": ["t25U-v4k4ts", "tgbONvsP47Y"], "start_seconds": ["40", "0"], "properties": ["bees buzz, birds chirp, man speaks", "pass, vehicle, roadway"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a car is driving on the road "], "question": "which entity is more active", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "some men converse over an engine running"], "sample_ids": ["y8dSeubCNI", "sCiy7QS1U"], "start_seconds": ["4", "300"], "properties": ["engine revving, people speaking, motorcycle", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["an engine revving and people talking in the background", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a motorcycle?", "label": 0}, {"captions": ["plastic is tapped on while someone speaks", "a toilet flushes and a female speaks"], "sample_ids": ["wvKpEYswXO0", "yaln9y8I7ms"], "start_seconds": ["150", "230"], "properties": ["plastic, tap, speak", "female, flushes, toilet"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["a wooden clack accompanies nearby chirping birds", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["yeFvk9x0wWI", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["clack, bird, chirp", "two men, woman, birds"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", null], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more birds", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "a stream of water flows as people talk and wind blows"], "sample_ids": ["uYT5gxnyMWM", "xBxDz0CFVn0"], "start_seconds": ["50", "30"], "properties": ["female, spraying, scream", "stream, water, flow"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing?", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a child speaks in closed space"], "sample_ids": ["v0x1odnXtP0", "yW6FWLSLkx4"], "start_seconds": ["210", "40"], "properties": ["keyboard, type, computer", "child, space, speak"], "captions_pred_video": ["how to make money on youtube in spanish", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a person is typing on a keyboard", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "a helicopter engine runs continuously"], "sample_ids": ["vlS6YMeWAPo", "ugHJF0hfYkg"], "start_seconds": ["40", "10"], "properties": ["noise, bleat, call", "engine, running, continuously"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a goat bleats and birds chirp", "a helicopter is flying overhead "], "question": "which entity is not a noise", "label": 1}, {"captions": ["an engine starts and increases in power", "an airplane engine spools and people speak"], "sample_ids": ["zjTG0gaGCUI", "wTjoRj1se3U"], "start_seconds": ["80", "390"], "properties": ["power, increase, engine", "airplane, engine, spool"], "captions_pred_video": [null, "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a jet engine is running and people are talking"], "question": "which entity is a moving object", "label": 1}, {"captions": ["children cry and people talk", "a car speeding up in the distance"], "sample_ids": ["xLwHe825Zs", "u0TrcHhkPQ"], "start_seconds": ["18", "20"], "properties": ["people talk, children cry, people talk", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby cries and a woman speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a child speaks", "a toilet door squeaks as it is opened"], "sample_ids": ["yW6FWLSLkx4", "sdXV-ylviw"], "start_seconds": ["40", "190"], "properties": ["a, child, speaks", "door, toilet, squeaks"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", null], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a dog barks and taps with background noise "], "question": "which entity is quieter", "label": 1}, {"captions": ["a stream of water flows quickly", "an engine runs loudly"], "sample_ids": ["wbHTKEJZyhc", "vqZuVbG6-HI"], "start_seconds": ["20", "130"], "properties": ["stream, water, flow", "loud, engine, run"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a lawn mower is running and men are speaking "], "question": "which entity is quieter", "label": 0}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "dishes cling together then a man begins to speak"], "sample_ids": ["zofjfKhqLk8", "sQGXqGcwOTc"], "start_seconds": ["10", "3"], "properties": ["background, metal, clank", "cling, speak, dishes"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "mechanisms are operating and water is splashing "], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["material crumbles into a microphone", "an insect buzzes around continuously"], "sample_ids": ["vofpvUo6NAw", "v25l1jef3JY"], "start_seconds": ["220", "0"], "properties": ["material, crumbles, microphone", "buzzes, continuously, insect"], "captions_pred_video": ["person wrapping a toy car in a plastic bag", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["paper is being crumpled and crinkled", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "a car speeding up in the distance"], "sample_ids": ["sfAvvZwdLCY", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["flushes, drains, water", "distance, car, speed"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a woman speaks and food sizzles while frying", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["wTideSjRFS0", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["food, sizzle, woman", "a, scream, girl"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a woman is speaking and a baby is crying"], "question": "which entity is about a girl speaking?", "label": 1}, {"captions": ["someone whistles a tune", "a duck quacks continuously"], "sample_ids": ["sIXTftIuUgw", "vh30P49Po6s"], "start_seconds": ["90", "30"], "properties": ["someone, tune, whistle", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a person whistling a song", "a duck is quacking loudly"], "question": "which entity is a bird", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["xvDdE3zNf8Y", "uZesmtKZGSw"], "start_seconds": ["120", "250"], "properties": ["a, female, speaks", "men, talk, cars"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a stream runs then someone speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["wbHTKEJZyhc", "wwyfGO2J4"], "start_seconds": ["20", "90"], "properties": ["stream, run, someone", "people, applaud, hoot"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", null], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a clock ticktocks briefly", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["u7C-AEBQM", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["ticktocks, clock, ticktocks briefly", "two men, woman, birds"], "captions_pred_video": [null, null], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a clock?", "label": 0}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "people applaud and hoot and chat quietly"], "sample_ids": ["zgUgkpk78xU", "wwyfGO2J4"], "start_seconds": ["70", "90"], "properties": ["clinking, humming, horn", "people, applaud, hoot"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", null], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "a cat meows as a young woman speaks"], "sample_ids": ["y2ZBGpgbhHM", "x5cuQjOdM3E"], "start_seconds": ["30", "30"], "properties": ["animal, growl, bird", "cat, meows, young woman"], "captions_pred_video": [null, "a black background with an airplane flying in the sky"], "captions_pred_audio": ["birds chirping and a dog panting", "a cat meows and a woman speaks"], "question": "which entity is more likely to be domesticated", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "small dogs yip and bark sharply"], "sample_ids": ["yswmmRZFItk", "v-wcQf4BDY0"], "start_seconds": ["0", "120"], "properties": ["background, frog, croak", "bark, yip, sharply"], "captions_pred_video": ["a close up of a frog in the water", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a frog is croaking", "a dog barks and growls"], "question": "which entity is more likely to be a frog", "label": 0}, {"captions": ["animals bleat and moo as a person speaks", "men speak and a nozzle sprays liquid"], "sample_ids": ["tPJvjq9QePY", "wRV8yMk886E"], "start_seconds": ["40", "0"], "properties": ["animal, bleat, moo", "liquid, spray, nozzle"], "captions_pred_video": ["a dog and a sheep in a barn", "two cars are parked in a parking lot at night"], "captions_pred_audio": ["a baby cries and a man speaks", "a man speaks followed by a loud burst"], "question": "which entity is more likely to be used in a science class", "label": 1}, {"captions": ["a woman sneezes then speaks", "a heavy rain falls endlessly"], "sample_ids": ["x4dZyf9Gbj0", "wP8ZKrlx3oA"], "start_seconds": ["130", "40"], "properties": ["sneezes, speaks, woman", "heavy, rain, fall"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a flooded street in the middle of a desert with mountains in the background"], "captions_pred_audio": ["a woman sneezes and speaks", "a heavy rain is falling on a surface"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "wind blows as people chatter quietly"], "sample_ids": ["uYT5gxnyMWM", "xBxDz0CFVn0"], "start_seconds": ["50", "30"], "properties": ["person, spray, yell", "wind, chatter, people"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a vehicle engine accelerating then running on idle", "a man speaks followed by another man speaking outside"], "sample_ids": ["vYkA3cfXp5Q", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["engine, accelerate, idle", "two men, speak, follow"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["an engine is idling", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of a vehicle engine accelerating then running on idle?", "label": 0}, {"captions": ["the wind blows while a vehicle engine runs", "water is sprayed across a hard surface"], "sample_ids": ["xyL9F5VrjkE", "sQwlkXjQabo"], "start_seconds": ["20", "10"], "properties": ["wind, blows, vehicle", "water, spray, surface"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sYITalLZjj4", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["water, rushes, background, birds", "music, gunfire, explosion"], "captions_pred_video": ["two ducks are swimming in the water near each other", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["wind blows and birds chirp", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sQGXqGcwOTc", "vJ7JPEFhyLA"], "start_seconds": ["3", "16"], "properties": ["cling, speak, dishes", "three men, wind, flow"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about liquid flowing?", "label": 1}, {"captions": ["a machine beeps continuously", "a dog barks and whimpers"], "sample_ids": ["y682ml90jGw", "sShpyu2l4YQ"], "start_seconds": ["11", "0"], "properties": ["beeps, machine, continuously", "barks, whimpers, dog"], "captions_pred_video": [null, "the puppies are playing with a toy"], "captions_pred_audio": ["a beeping sound is being made ", "a dog is barking and growling"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "a child speaks in closed space"], "sample_ids": ["u7C-AEBQM", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["ticks, rhythmic, quiet", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a ticktock of a clock", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "some men converse over an engine running"], "sample_ids": ["w34HjHr6gAY", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["beeps, squawk, child speaking", "men, converse, engine"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", null], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a video of a child speaking?", "label": 0}, {"captions": ["people cheer as a vehicle engine revs", "a child speaks in closed space"], "sample_ids": ["xjhAnI2q6hM", "yW6FWLSLkx4"], "start_seconds": ["6", "40"], "properties": ["engine revs, vehicle, people", "child, space, speak"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "wind blows as people chatter quietly"], "sample_ids": ["wwyfGO2J4", "xBxDz0CFVn0"], "start_seconds": ["90", "30"], "properties": ["people, applaud, hoot", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["tOj4tdLRaA", "uZesmtKZGSw"], "start_seconds": ["70", "250"], "properties": ["woman, laugh, baby", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a crowd yells, reacts and applauds", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["wztCSUxOf8", "y8WEcpOlT3I"], "start_seconds": ["130", "40"], "properties": ["a crowd, yells, applauds", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a man is speaking with wind noise in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["water flows followed by women screaming", "water runs from a faucet while some men speak and the water runs in the sink"], "sample_ids": ["w5W5Kqtc8E", "vzceMbklWc"], "start_seconds": ["100", "180"], "properties": ["water, flow, women", "water, faucet, sink"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "water is running and a man is speaking"], "question": "which entity shows water flowing?", "label": 0}, {"captions": ["wind blows and people scream while an engine revs", "a duck quacks continuously"], "sample_ids": ["w5W5Kqtc8E", "vh30P49Po6s"], "start_seconds": ["100", "30"], "properties": ["wind, engine, scream", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "a man speaks while rain falls onto a hard surface"], "sample_ids": ["w5W5Kqtc8E", "wqN6IIHw3po"], "start_seconds": ["100", "30"], "properties": ["water, splashes, motorboat", "rain, surface, fall"], "captions_pred_video": [null, "in your own words what is happening in this screenshot? blood splattered all over the place"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking and water is splashing"], "question": "which entity is a video of rain falling?", "label": 1}, {"captions": ["a toilet flushes and water sputters as it drains", "three men talk while wind blows and some liquid flows"], "sample_ids": ["smGI3C1NZc", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["water, drain, toilet", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water flowing?", "label": 0}, {"captions": ["loud intermittent buzzing with intermittent laughter", "a male speaks and another male speaks"], "sample_ids": ["sLUnaPT5gM8", "viuTg1M-dqg"], "start_seconds": ["0", "30"], "properties": ["loud, laughter, intermittent", "two males, speaking, male"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a airplane flies overhead as a woman speaks", "a pigeon cooing as an insect buzzes by briefly"], "sample_ids": ["zj2R0XoFr5k", "yZrFNS7GFBQ"], "start_seconds": ["50", "30"], "properties": ["airplane, fly, woman", "pigeon, buzzes, insect"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "of the bird in the cage"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "an owl hoots in the background "], "question": "which entity is a bird?", "label": 1}, {"captions": ["birds chirp and objects are moved around", "water pouring and bubbling"], "sample_ids": ["yPUYU6t3rwo", "uyRfq-jKPpo"], "start_seconds": ["370", "50"], "properties": ["birds chirp, objects are moved around, birds", "water, bubbles, pouring"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["insects buzz and a man speaks", "water is running from a faucet"], "question": "which entity is more likely to be in a bathroom", "label": 1}, {"captions": ["a infant makes noise and is excited", "an infant crying frantically"], "sample_ids": ["wIJK3-5y0kA", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["noise, excited, infant", "cry, infant, frantically"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "of the baby crying in the car seat"], "captions_pred_audio": ["a baby cries and a woman speaks", "a baby cries loudly"], "question": "which infant is crying frantically", "label": 1}, {"captions": ["birds twitter and chirp and clatter", "waves crash against a shoreline and people speak"], "sample_ids": ["yeFvk9x0wWI", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["chirp, twitter, clatter", "wave, crash, shoreline"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "footage of a person surfing in the ocean"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a toilet flushes and water sputters as it drains", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["smGI3C1NZc", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["water, drain, toilet", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "pigeons vocalize and birds chirp"], "sample_ids": ["xfudFO976zE", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["animal, bleats, cry", "vocalize, bird, chirp"], "captions_pred_video": ["footage is blurry and shaky", "of the pigeon in the cage"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a man is speaking and a bee is buzzing"], "question": "which animal is vocalizing", "label": 1}, {"captions": ["a dog barks and whimpers", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sShpyu2l4YQ", "sLUnaPT5gM8"], "start_seconds": ["0", "0"], "properties": ["barks, whimpers, dog", "loud, laughter, intermittent"], "captions_pred_video": ["the puppies are playing with a toy", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a dog is barking and growling", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["an aircraft engine runs as people speak", "distant men speak as a spray can nozzle is depressed"], "sample_ids": ["wTjoRj1se3U", "rwtmaKiCcQU"], "start_seconds": ["390", "30"], "properties": ["engine, run, people", "nozzle, depressed, spray can"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "shows a man spraying paint on a wall with a spray gun"], "captions_pred_audio": ["a jet engine is running and people are talking", "spraying and people speaking"], "question": "which entity is a spray can?", "label": 1}, {"captions": ["a propeller rotates loudly and intensely", "a vehicle engine accelerating then running on idle"], "sample_ids": ["ugHJF0hfYkg", "vYkA3cfXp5Q"], "start_seconds": ["10", "30"], "properties": ["loud, intense, propeller", "engine, accelerate, idle"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a helicopter is flying overhead ", "an engine is idling"], "question": "which is a vehicle", "label": 1}, {"captions": ["a person speaks briefly", "several insects fly while two men talk"], "sample_ids": ["zOZleIRqZm4", "s-T9OVOiMLo"], "start_seconds": ["80", "330"], "properties": ["person, talk, brief", "several, fly, men"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more likely to be a video", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sLUnaPT5gM8", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["loud, laughter, intermittent", "a woman, something, fried"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a woman is speaking while food is frying in the background"], "question": "which entity is a video", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "winds blows roughly as a vehicle races past"], "sample_ids": ["w8uLijTqtlU", "xjvTpk2Zpr8"], "start_seconds": ["70", "70"], "properties": ["wind, microphone, noise", "wind, blows, vehicle"], "captions_pred_video": ["footage is blurry and shaky", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["the wind is blowing strongly", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["an audience gives applause as a man yells and a group sings", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["tdWhHV3X25Q", "ukg5L09Wpvo"], "start_seconds": ["60", "150"], "properties": ["applause, audience, yells", "clickety-clack, train, whistle"], "captions_pred_video": ["a man is talking to another man on a stage in front of a microphone", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a train blows its whistle and blows its horn "], "question": "which entity is quieter", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["yYEVLuqEytU", "ukg5L09Wpvo"], "start_seconds": ["40", "150"], "properties": ["animal, pig, background", "clickety-clack, train, whistle"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a train horn sounds and railroad crossing ring", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["s7knHCFW82w", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["horn, sound, train", "engine, idle, woman"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["wind blows strongly and a young man speaks", "a clock ticktocks"], "sample_ids": ["vs65y4qmyBE", "v-g-j2uTByM"], "start_seconds": ["340", "30"], "properties": ["wind, blows, strongly", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a frog vocalizes while birds chirp", "vehicles pass by on a roadway"], "sample_ids": ["vMf1dLD6Sng", "tgbONvsP47Y"], "start_seconds": ["6", "0"], "properties": ["frog, bird, vocalize", "pass, vehicle, roadway"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a frog croaks loudly", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "paper folding and crinkling"], "sample_ids": ["wudZTNBtVqc", "zPpG3RD8lSs"], "start_seconds": ["60", "20"], "properties": ["accelerates, engine, wind", "paper, fold, crinkle"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a car accelerates and revs its engine ", "the wind blows and a mouse clicks "], "question": "which entity is stationary", "label": 1}, {"captions": ["a goat screams and people speak in the background", "a man speaks as a car is passing by"], "sample_ids": ["xC8kbrKJmco", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["background, goat, scream", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a goat is bleating ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a vehicle engine revs and tires squeal", "waves crash against a shoreline and people speak"], "sample_ids": ["yDoT73BWsdA", "yFB25fqfU8I"], "start_seconds": ["10", "300"], "properties": ["engine revs, tires squeal, vehicle", "wave, crash, shoreline"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["someone snores nearby", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["spJCm8tD9Zo", "vbZ-0lGPneg"], "start_seconds": ["90", "30"], "properties": ["someone snores, nearby, someone", "a woman, a television program, a bird"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program?", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "vehicles pass by on a roadway"], "sample_ids": ["wvKpEYswXO0", "tgbONvsP47Y"], "start_seconds": ["150", "0"], "properties": ["plastic, tap, speak", "pass, vehicle, roadway"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["xM4joTqDVp4", "vlS6YMeWAPo"], "start_seconds": ["160", "40"], "properties": ["background, chirp, birds", "sheep, baa, birds"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a goat bleats and birds chirp"], "question": "which entity has more birds", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["y2ZBGpgbhHM", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["birds, tweet, pant", "clickety-clack, train, whistle"], "captions_pred_video": [null, "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["birds chirping and a dog panting", "a train blows its whistle and blows its horn "], "question": "which entity is a train", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "an engine runs loudly"], "sample_ids": ["vms5XGTDVQc", "vqZuVbG6-HI"], "start_seconds": ["220", "130"], "properties": ["paper, crumpled, crinkled", "loud, engine, run"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "footage is blurry because it's raining outside"], "captions_pred_audio": ["paper is crumpled and crinkled", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "a motor vehicle roars, drowning out people speaking in the background"], "sample_ids": ["u21-Z5gJCB8", "s4Uz1Ffgo04"], "start_seconds": ["30", "100"], "properties": ["background, voice, man", "roars, background, people speaking"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["yLy-WycbVVE", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["background, people, talk", "female, spraying, scream"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "pigeons vocalize and birds chirp"], "sample_ids": ["u21-Z5gJCB8", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["background, voice, man", "vocalize, bird, chirp"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a stream of water runs briefly"], "sample_ids": ["yYEVLuqEytU", "x-PeY8Yb8M4"], "start_seconds": ["40", "300"], "properties": ["animal, pig, background", "stream, water, run"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a helicopter engine runs", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["t5ZbXbniOWk", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["engine, helicopter, run", "water, radio, man"], "captions_pred_video": ["the image is loading a screenshot of the game flight simulator 2004 on nintendo gamecube", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a helicopter?", "label": 1}, {"captions": ["an insect buzzes around continuously", "some men converse over an engine running"], "sample_ids": ["v25l1jef3JY", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["buzzes, continuously, insect", "men, converse, engine"], "captions_pred_video": ["a black background with a cartoon character in the foreground", null], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is not a person?", "label": 0}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["w2JXXIAdUdg", "w34HjHr6gAY"], "start_seconds": ["10", "30"], "properties": ["snoring, distance, person", "beeps, hit, woman"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "some tunes played by whistling"], "sample_ids": ["se87d6yxEOA", "u6BnG6YZqJ4"], "start_seconds": ["10", "0"], "properties": ["run, whistle, pass", "tune, play, whistling"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vddP56-ogds", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["water, splash, person, laugh", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a consistent ticking pattern", "birds chirp and objects are moved around"], "sample_ids": ["sCeWURVHfOM", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["ticking, pattern, clock", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["- a close-up view of the clock's inner workings", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["ticking of a clock", "insects buzz and a man speaks"], "question": "which entity is more like a clock", "label": 0}, {"captions": ["someone sprays liquid onto a hard surface", "an insect buzzes around continuously"], "sample_ids": ["sQwlkXjQabo", "v25l1jef3JY"], "start_seconds": ["10", "0"], "properties": ["liquid, surface, spray", "buzzes, continuously, insect"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["spraying followed by silence", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["a stream of water runs briefly", "people speak as gunfire rings out"], "sample_ids": ["x-PeY8Yb8M4", "wqTCwqVRDlk"], "start_seconds": ["300", "80"], "properties": ["stream, water, run", "gunfire, ring, speak"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a car is driving on a wet road ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["xjhAnI2q6hM", "zl9Dqx-j7q4"], "start_seconds": ["6", "6"], "properties": ["engine revs, vehicle, people", "engine, laugh, loud"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a jet engine roars "], "question": "which entity is about a vehicle engine?", "label": 0}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["xKB8O8LTs6s", "wqZ135Ssz0"], "start_seconds": ["70", "60"], "properties": ["music, radio, gunshots", "two men, woman, birds"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", null], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "paper is crumpling consistently"], "sample_ids": ["vcmWSmvti8", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["music, man, fire", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "paper is crumpled and crinkled"], "question": "which entity is not a video of a man speaking as music plays before artillery is fired?", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["sxYkFKFIZD0", "yDoT73BWsdA"], "start_seconds": ["20", "10"], "properties": ["screech, man, door", "engine, revs, vehicle"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a race car accelerates and revs its engine "], "question": "which vehicle is moving", "label": 1}, {"captions": ["birds chirp and objects are moved around", "vehicles pass by on a roadway"], "sample_ids": ["yPUYU6t3rwo", "tgbONvsP47Y"], "start_seconds": ["370", "0"], "properties": ["birds chirp, objects are moved around, birds", "pass, vehicle, roadway"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "footage of a fire truck entering a garage"], "captions_pred_audio": ["insects buzz and a man speaks", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "a man speaks as a car is passing by"], "sample_ids": ["vcmWSmvti8", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["music, man, fire", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a man speaking as a car passes by?", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "dishes cling together then a man begins to speak"], "sample_ids": ["ziUT9IFTkjg", "sQGXqGcwOTc"], "start_seconds": ["10", "3"], "properties": ["background, birds, rustling", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "a man speaks as a vehicles passes by then a woman speaks"], "sample_ids": ["y4tPJXBKDig", "siJFXfGWgDk"], "start_seconds": ["20", "50"], "properties": ["a, noise, talk", "man, woman, vehicle"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a man is speaking and birds are chirping in the background "], "question": "which entity has a man and a woman speaking?", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "people speak as gunfire rings out"], "sample_ids": ["xV7Mg1QucSc", "wqTCwqVRDlk"], "start_seconds": ["14", "80"], "properties": ["alarm, ticktocks, laughs", "gunfire, ring, speak"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["ul60S8TXDA8", "tiDFTC-5vU"], "start_seconds": ["60", "30"], "properties": ["sound, distance, bell", "male, duck, laugh"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", null], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking?", "label": 1}, {"captions": ["a man speaks as a car is passing by", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sK4u5T8hW78", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["a, car, pass", "music, gunfire, explosion"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "winds blows roughly as a vehicle races past"], "sample_ids": ["yYEVLuqEytU", "xjvTpk2Zpr8"], "start_seconds": ["40", "70"], "properties": ["grunt, slurp, background", "wind, blows, vehicle"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["a baby laugh at a sputter", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["sLUnaPT5gM8", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["laugh, sputter, baby", "animal, grunts, snorts"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "an airplane engine spools and people speak"], "sample_ids": ["slZLHwNbbt4", "wTjoRj1se3U"], "start_seconds": ["300", "390"], "properties": ["a, horn, run", "airplane, engine, spool"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a jet engine is running and people are talking"], "question": "which entity is a machine", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "a man speaks as a motor runs in the background"], "sample_ids": ["y2ZBGpgbhHM", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["birds, tweet, pant", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["birds chirping and a dog panting", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a motor idles, accelerates, then slows down.", "a toilet flushes and a female speaks"], "sample_ids": ["vYkA3cfXp5Q", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["speed, idle, accelerate", "female, flushes, toilet"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "footage is blurry and out of focus"], "captions_pred_audio": ["an engine is idling", "a toilet flushes and a man speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a vehicle engine runs while a siren and horn sound", "water pouring and bubbling"], "sample_ids": ["u--KhUW8l1Y", "uyRfq-jKPpo"], "start_seconds": ["0", "50"], "properties": ["engine, sound, horn", "water, bubbles, pouring"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["ukg5L09Wpvo", "w5W5Kqtc8E"], "start_seconds": ["150", "100"], "properties": ["a train, a horn, a bell", "wind, blow, vehicle"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", null], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "a man talks nearby and another man talks far away while some liquid flows"], "sample_ids": ["yajyRTUQk3U", "sapQIQUhFc"], "start_seconds": ["400", "280"], "properties": ["a woman, something, fried", "liquid, flow, distance"], "captions_pred_video": ["- a woman cooking in the kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking and a stream is flowing in the background "], "question": "which entity is about a woman talking?", "label": 0}, {"captions": ["a person whistles and clicks a mouse", "continuous snoring"], "sample_ids": ["zCrAfDfv6-A", "sLkeqCDJIyw"], "start_seconds": ["30", "120"], "properties": ["person, mouse, click", "loud, snoring, noise"], "captions_pred_video": ["shows a man with glasses and a green shirt sitting on a couch in a living room", ", what is the man doing on the couch? sleeping"], "captions_pred_audio": ["a person whistles a song", "a person is snoring loudly"], "question": "which noise is louder", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "a car accelerates and wind blows"], "sample_ids": ["ul60S8TXDA8", "u0TrcHhkPQ"], "start_seconds": ["60", "20"], "properties": ["sound, distance, bell", "accelerates, wind, blows"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", null], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "water rushes and then a vehicle zooms past"], "sample_ids": ["vzxHnu-SFEw", "s4Uz1Ffgo04"], "start_seconds": ["80", "100"], "properties": ["two objects, woman, speak", "water, rushes, vehicle"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is more active", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "multiple people speak and children yell while water gurgles"], "sample_ids": ["zofjfKhqLk8", "vb1fPSDI4c"], "start_seconds": ["10", "30"], "properties": ["background, metal, clank", "multiple, people, yell"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a crowd of people are talking and laughing"], "question": "which entity has a lot of noise", "label": 1}, {"captions": ["a toilet flushes and water sputters as it drains", "pigeons vocalize and birds chirp"], "sample_ids": ["smGI3C1NZc", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["water, drain, toilet", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird?", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "some light rustling followed by a loud burp and a girl speaking"], "sample_ids": ["wy1eKjR7KC0", "vdoxuJn9lTc"], "start_seconds": ["30", "40"], "properties": ["people, talk, distance", "burp, loud, girl"], "captions_pred_video": ["two police officers riding motorcycles down the street", "a group of young girls playing a video game together in a living room"], "captions_pred_audio": ["a man is speaking and a siren is going off", "a child speaks followed by a burp"], "question": "which entity is quieter", "label": 0}, {"captions": ["a drill drills through something then people begin laughing", "water running down a sink while a man is talking"], "sample_ids": ["tEE3MpBt1sg", "vSeGhaZt-aI"], "start_seconds": ["50", "50"], "properties": ["drill, something, laugh", "water, sink, talk"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man is speaking and pouring liquid with background noise "], "question": "which entity is a video of water running down a sink?", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["sU53zg9Jp7s", "ziUT9IFTkjg"], "start_seconds": ["380", "10"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "background, birds, rustling"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", null], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "birds are chirping and a chime is ringing "], "question": "which entity has a doorbell ringing?", "label": 0}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "pigeons vocalize and birds chirp"], "sample_ids": ["uC9dtII1KDI", "uiS58TNyUiw"], "start_seconds": ["150", "430"], "properties": ["wind, gusts, distance", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "of the pigeon in the cage"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a saw finishes running as metal clings in the background", "people speak as gunfire rings out"], "sample_ids": ["zofjfKhqLk8", "wqTCwqVRDlk"], "start_seconds": ["10", "80"], "properties": ["background, metal, clings", "gunfire, ring, speak"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "multiple people speak and children yell while water gurgles"], "sample_ids": ["wtDqrBygTcU", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["man, engine, run", "multiple, people, yell"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", null], "captions_pred_audio": ["a man is speaking and a motor is running", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "wind blows as people chatter quietly"], "sample_ids": ["yVumC9TGknc", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["humming, clock, birds", "wind, chatter, people"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "footage is blurry and out of focus"], "captions_pred_audio": ["a series of beeps and chirps", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "a stream of water flows as people talk and wind blows"], "sample_ids": ["wqN6IIHw3po", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["rain, surface, fall", "stream, water, flow"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and water is splashing", "a man is speaking with wind noise in the background "], "question": "which entity is flowing", "label": 1}, {"captions": ["a stream of water flows quickly", "wind blows as people chatter quietly"], "sample_ids": ["wbHTKEJZyhc", "xBxDz0CFVn0"], "start_seconds": ["20", "30"], "properties": ["stream, water, flow", "wind, chatter, people"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "footage is blurry and out of focus"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a man is speaking with wind noise in the background "], "question": "which entity is moving more slowly", "label": 1}, {"captions": ["a soft wind underscores a woman laughing", "a duck quacks continuously"], "sample_ids": ["s6DESzUTGjY", "vh30P49Po6s"], "start_seconds": ["16", "30"], "properties": ["wind, laugh, woman", "quacks, continuously, duck"], "captions_pred_video": ["how to set up an aquarium in 10 easy steps youtube youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aquarium in 10 easy steps youtube how to set up an aqu", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a motorboat is moving with wind noise in the background ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "a man speaks as a motor runs in the background"], "sample_ids": ["w2M4i1mklOA", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["loud, chime, bell", "background, motor, run"], "captions_pred_video": ["footage of an antique clock", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is quieter", "label": 1}, {"captions": ["scraping and female speech with distant music", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["yHeVV-xeOxQ", "y2bVZ7rz-5M"], "start_seconds": ["130", "280"], "properties": ["female, speech, music", "motor noise, horn, siren"], "captions_pred_video": ["of a girl milking a goat's udder", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "a truck is honking its horn and a siren is blaring "], "question": "which entity has a horn honking?", "label": 1}, {"captions": ["someone sprays a liquid onto a hard surface making a hiss sound", "people applaud and hoot and chat quietly"], "sample_ids": ["zO-LSSY92ZM", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["liquid, surface, sound", "people, applaud, hoot"], "captions_pred_video": ["youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'", null], "captions_pred_audio": ["steam is hissing and hissing", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["an engine runs and a man speaks", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["yT5WfYMRr-U", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["engine, run, man", "a woman, a television program, a bird"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["people speak as gunfire rings out", "water flows and trickles"], "sample_ids": ["wqTCwqVRDlk", "tB7hWb9gTuQ"], "start_seconds": ["80", "30"], "properties": ["gunfire, ring, speak", "water, flow, trickle"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking and a gun is fired", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an infant crying as a woman laughs", "paper is crumpling consistently"], "sample_ids": ["xhmRY9yhC7c", "v5cSxLaHADY"], "start_seconds": ["20", "0"], "properties": ["a, laugh, infant", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of a baby crying in a baby bouncer", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a baby cries and a woman speaks", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["someone snores nearby", "someone is burping continuously"], "sample_ids": ["spJCm8tD9Zo", "y636gklDioE"], "start_seconds": ["90", "20"], "properties": ["someone snores, nearby, someone", "burps, burps, burps"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a dog sitting on a red chair in front of an old telephone"], "captions_pred_audio": ["a person is snoring loudly", "a person burps loudly several times"], "question": "which entity is more annoying", "label": 1}, {"captions": ["a person is whistling", "winds blows roughly as a vehicle races past"], "sample_ids": ["sIXTftIuUgw", "xjvTpk2Zpr8"], "start_seconds": ["90", "70"], "properties": ["person, whistling, person", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a person whistling a song", "a jet engine roars and wind blows "], "question": "which entity is a person", "label": 0}, {"captions": ["continuous snoring", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["sLkeqCDJIyw", "wDVMhEdTiVw"], "start_seconds": ["120", "30"], "properties": ["loud, snoring, noise", "gun, shoot, water"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a person is snoring loudly", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is louder", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "dishes cling together then a man begins to speak"], "sample_ids": ["uC9dtII1KDI", "sQGXqGcwOTc"], "start_seconds": ["150", "3"], "properties": ["wind, gusts, distance", "cling, speak, dishes"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "mechanisms are operating and water is splashing "], "question": "which entity is about a woman speaking in the distance?", "label": 0}, {"captions": ["paper is repeatedly crumpled and crinkled", "a man speaks followed by another man speaking outside"], "sample_ids": ["vms5XGTDVQc", "viuTg1M-dqg"], "start_seconds": ["220", "30"], "properties": ["paper, crumpled, crinkled", "two men, speak, follow"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["paper is crumpled and crinkled", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of two men speaking?", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "an airplane engine spools and people speak"], "sample_ids": ["vs65y4qmyBE", "wTjoRj1se3U"], "start_seconds": ["340", "390"], "properties": ["engine, run, man", "airplane, engine, spool"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a jet engine is running and people are talking"], "question": "which entity is a machine?", "label": 0}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "a horn rings out as a machine runs by"], "sample_ids": ["t69a8aRKhmc", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["a, b, c", "a, horn, run"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity has a horn?", "label": 1}, {"captions": ["birds vocalize and a man speaks", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["v0wPrLBI3hg", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["vocalize, bird, speak", "gun, shoot, water"], "captions_pred_video": ["footage of the pigeons feeding on the ground", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["pigeons coo and flap wings while a man speaks ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be heard", "label": 0}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "people cheer as a vehicle engine revs"], "sample_ids": ["uYT5gxnyMWM", "xjhAnI2q6hM"], "start_seconds": ["50", "6"], "properties": ["female, spraying, scream", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a person sniffs and sneezes", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["uRlbY6aoBU", "tiDFTC-5vU"], "start_seconds": ["0", "30"], "properties": ["sneezes, person, sniffs", "male, duck, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is sneezing ", "a man is speaking and ducks are quacking"], "question": "which entity is a person", "label": 0}, {"captions": ["a man talks while a clock does ticktock", "vehicles pass by on a roadway"], "sample_ids": ["spYNpeN7rPY", "tgbONvsP47Y"], "start_seconds": ["1", "0"], "properties": ["a clock, ticktock, man", "pass, vehicle, roadway"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "a man speaks followed by another man speaking outside"], "sample_ids": ["sfAvvZwdLCY", "viuTg1M-dqg"], "start_seconds": ["20", "30"], "properties": ["flushes, drains, water", "two men, speak, follow"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more than one speaker?", "label": 1}, {"captions": ["an adult woman speaks over chopping and silverware noises", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["yYJksgsxx5U", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["audio, woman, silverware", "People, motor, brakes"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", null], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a woman speaking over chopping and silverware noises?", "label": 0}, {"captions": ["someone snores nearby", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["spJCm8tD9Zo", "yajyRTUQk3U"], "start_seconds": ["90", "400"], "properties": ["someone snores, nearby, someone", "a woman, something, fried"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tDlysoZiA1I", "tdWhHV3X25Q"], "start_seconds": ["0", "60"], "properties": ["animal, grunt, multiple", "applause, audience, yells"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "a horse runs while two women talk"], "sample_ids": ["s3cTDAj31g", "sdvI1mHAsc"], "start_seconds": ["80", "20"], "properties": ["man, talk, woman", "two women, horse, run"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a baby is crying", "horses clip-clop and a woman speaks"], "question": "which entity has more people", "label": 1}, {"captions": ["an airplane engine roars increasingly louder", "a child speaks in closed space"], "sample_ids": ["vBslzh7saPw", "yW6FWLSLkx4"], "start_seconds": ["90", "40"], "properties": ["engine, roar, louder", "child, space, speak"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["vZAw4apG0Es", "tDlysoZiA1I"], "start_seconds": ["30", "0"], "properties": ["background, clock, ticktocks", "animal, grunts, chirps"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a clock is ticking and people are talking", "birds are chirping and a rooster is crowing "], "question": "which entity is more animal like", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["xfudFO976zE", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["animal, bleats, cry", "a woman, something, fried"], "captions_pred_video": ["footage is blurry and shaky", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["zdYdyF9-m8U", "yDoT73BWsdA"], "start_seconds": ["7", "10"], "properties": ["wind, crash, shoreline", "engine, revs, vehicle"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["waves crash and wind blows ", "a race car accelerates and revs its engine "], "question": "which entity is stationary", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "three men talk while wind blows and some liquid flows"], "sample_ids": ["y8WEcpOlT3I", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["harsh, wind, blows", "three men, wind, flow"], "captions_pred_video": ["on how to use a sewing machine youtube", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more people", "label": 1}, {"captions": ["a man speaks as horns blow", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["tHyNqRyK34A", "wqZ135Ssz0"], "start_seconds": ["24", "60"], "properties": ["a, man, speaks", "two men, woman, birds"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", null], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["vddP56-ogds", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["water, flow, laugh", "beeps, hit, woman"], "captions_pred_video": [null, "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["w34HjHr6gAY", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["beeps, squawk, child speaking", "motor noise, horn, siren"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a truck is honking its horn and a siren is blaring "], "question": "which entity has a horn honking?", "label": 1}, {"captions": ["multiple birds vocalize and wind blows", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["uoGVs9yUqY4", "yDoT73BWsdA"], "start_seconds": ["30", "10"], "properties": ["multiple, vocalize, wind", "engine, revs, vehicle"], "captions_pred_video": ["for how to make a wooden shed door youtube", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a race car accelerates and revs its engine "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "a man speaks as a motor runs in the background"], "sample_ids": ["y8WEcpOlT3I", "xZepNM9qcRA"], "start_seconds": ["40", "30"], "properties": ["harsh, wind, blows", "background, motor, run"], "captions_pred_video": ["on how to use a sewing machine youtube", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["bees buzz and wind blows", "a child speaks in closed space"], "sample_ids": ["tMJne1a4AFI", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["bees buzz, wind blows, bees", "child, space, speak"], "captions_pred_video": ["a swarm of bees on the ground", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a swarm of bees buzzing around", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["motors runs briefly and tires screech", "wind blowing followed by a zoom"], "sample_ids": ["yRx9txMcBl0", "vr8ZXjEBhMQ"], "start_seconds": ["40", "150"], "properties": ["motors, tires, screech", "wind, blow, zoom"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a car is revving its engine and skidding ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom?", "label": 1}, {"captions": ["a rumbling clap in the distance followed by a horn and the rumbling grows louder", "a clock ticktocks"], "sample_ids": ["slZLHwNbbt4", "v-g-j2uTByM"], "start_seconds": ["300", "30"], "properties": ["clap, distance, horn", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["birds chirp in the background while a horse neighs followed by a girl speaking", "water flows and trickles"], "sample_ids": ["s59PfAghdkM", "tB7hWb9gTuQ"], "start_seconds": ["0", "30"], "properties": ["bird, chirp, background, horse, neigh", "water, flow, trickle"], "captions_pred_video": ["is an anime scene featuring two people and a horse in the foreground and a fence in the background", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["birds are chirping a horse is neighing and a woman is speaking ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a toilet flushes and water sputters as it drains", "a propeller rotates loudly and intensely"], "sample_ids": ["smGI3C1NZc", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["water, drain, toilet", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a toilet is flushed", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["someone is burping continuously", "multiple people speak and children yell while water gurgles"], "sample_ids": ["y636gklDioE", "vb1fPSDI4c"], "start_seconds": ["20", "30"], "properties": ["burps, burps, burps", "multiple, people, yell"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", null], "captions_pred_audio": ["a person burps loudly several times", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["vqZuVbG6-HI", "y8WEcpOlT3I"], "start_seconds": ["130", "40"], "properties": ["background, male, female", "harsh, wind, blows"], "captions_pred_video": ["footage is blurry because it's raining outside", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a man is speaking with wind noise in the background "], "question": "which entity has a harsh wind blowing?", "label": 1}, {"captions": ["a woman speaks and is crumpling paper", "females talk and laugh over gusting wind"], "sample_ids": ["xvDdE3zNf8Y", "un9VQlzgZM"], "start_seconds": ["120", "5"], "properties": ["A, crumple, paper", "females, talk, laugh"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", null], "captions_pred_audio": ["a woman speaks and crumples paper", "a woman is speaking and laughing with wind noise and breathing in the background "], "question": "which entity is a group of people", "label": 1}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vlS6YMeWAPo", "tdWhHV3X25Q"], "start_seconds": ["40", "60"], "properties": ["sheep, baa, birds", "applause, audience, yells"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a goat bleats and birds chirp", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "a man speaks as a car is passing by"], "sample_ids": ["u--KhUW8l1Y", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["horn, siren, life", "a, car, pass"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["tQWGZLItBXk", "y8WEcpOlT3I"], "start_seconds": ["170", "40"], "properties": ["music, person, ding", "harsh, wind, blows"], "captions_pred_video": ["worms revolution screenshots", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a man is speaking with wind noise in the background "], "question": "which entity has a harsh wind blowing?", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "small dogs yip and bark sharply"], "sample_ids": ["sa6TLVbooCc", "v-wcQf4BDY0"], "start_seconds": ["240", "120"], "properties": ["people, laugh, child", "bark, yip, sharply"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["y2ZBGpgbhHM", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["animal, growl, bird", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["birds chirping and a dog panting", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "a duck quacks continuously"], "sample_ids": ["xKB8O8LTs6s", "vh30P49Po6s"], "start_seconds": ["70", "30"], "properties": ["music, gunfire, explosion", "quacks, continuously, duck"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a duck is quacking loudly"], "question": "which entity is a duck?", "label": 1}, {"captions": ["a large crowd cheers and applauds", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["rqfQRErjfk8", "yajyRTUQk3U"], "start_seconds": ["170", "400"], "properties": ["crowd, cheers, applauds", "a woman, something, fried"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a woman is speaking while food is frying in the background"], "question": "which entity is a demonstration of cooking?", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "a bird chirps in response to a woman chirping for the birds"], "sample_ids": ["zl9Dqx-j7q4", "uOpoD0gGXcs"], "start_seconds": ["6", "120"], "properties": ["motors rev, laugh, loudly", "chirps, woman, bird"], "captions_pred_video": ["footage of a man driving a car in the dark", "a herd of cows grazing in the field"], "captions_pred_audio": ["a jet engine roars ", "birds are chirping and a man is speaking"], "question": "which entity is a response to a human action", "label": 1}, {"captions": ["an airplane flies overhead as a woman speaks", "water pouring and bubbling"], "sample_ids": ["zj2R0XoFr5k", "uyRfq-jKPpo"], "start_seconds": ["50", "50"], "properties": ["airplane, fly, overhead", "water, bubbles, pouring"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "a clock ticktocks"], "sample_ids": ["zgUgkpk78xU", "v-g-j2uTByM"], "start_seconds": ["70", "30"], "properties": ["clinking, humming, horn", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a clock is ticking loudly"], "question": "which entity is a clock?", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "vehicles pass by on a roadway"], "sample_ids": ["zY3icUyMdh8", "tgbONvsP47Y"], "start_seconds": ["20", "0"], "properties": ["dog, bark, engine", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a car is driving on the road "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man woman speak while crickets sing", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["zTLVJCo4WEE", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["a, crickets, sing", "male, duck, laugh"], "captions_pred_video": ["- a boy with a rifle aiming at a target", null], "captions_pred_audio": ["a woman speaks and crickets chirp", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking?", "label": 1}, {"captions": ["an engine starts and increases in power", "roadway noise occurs and a truck accelerates"], "sample_ids": ["zjTG0gaGCUI", "tgbONvsP47Y"], "start_seconds": ["80", "0"], "properties": ["power, increase, engine", "noise, truck, accelerate"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a car is driving on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "a motor slows to a stopover traffic noises"], "sample_ids": ["zcDwZ6W7E3E", "zofjfKhqLk8"], "start_seconds": ["180", "10"], "properties": ["man, speak, motorcycles", "noise, stop, motor"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage of a man using a machine to cut a piece of wood"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a large engine is running and a bell is ringing"], "question": "which is a stopover", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a telephone rings followed by a woman talking"], "sample_ids": ["vzxHnu-SFEw", "tGcFnX0GHI"], "start_seconds": ["80", "0"], "properties": ["two objects, woman, speak", "ring, talk, woman"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", null], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a dial tone sounds followed by a woman speaking"], "question": "which woman is talking", "label": 1}, {"captions": ["a man talks while vehicles pass by", "small dogs yip and bark sharply"], "sample_ids": ["sK4u5T8hW78", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["a, man, talk", "bark, yip, sharply"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["uEU-Hg5MTN8", "wDVMhEdTiVw"], "start_seconds": ["27", "30"], "properties": ["animal, grunts, snorts", "gun, shoot, water"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to cause injury", "label": 1}, {"captions": ["paper folding and crinkling", "a child speaks in closed space"], "sample_ids": ["zPpG3RD8lSs", "yW6FWLSLkx4"], "start_seconds": ["20", "40"], "properties": ["paper, fold, crinkle", "child, space, speak"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is not a child?", "label": 0}, {"captions": ["some men converse over an engine running", "someone is typing on a computer keyboard"], "sample_ids": ["sCiy7QS1U", "v0x1odnXtP0"], "start_seconds": ["300", "210"], "properties": ["men, converse, engine", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a person is typing on a keyboard"], "question": "which is a more active activity", "label": 0}, {"captions": ["a kid speaks followed by music playing", "a large crowd cheers and applauds"], "sample_ids": ["tQWGZLItBXk", "rqfQRErjfk8"], "start_seconds": ["170", "170"], "properties": ["music, kid, speak", "crowd, cheers, applauds"], "captions_pred_video": ["worms revolution screenshots", "a man hugging another man in front of an orchestra"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a crowd of people clapping and cheering"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["tOj4tdLRaA", "su6FAOcOA8c"], "start_seconds": ["70", "4"], "properties": ["woman, laugh, baby", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a woman is speaking and a subway train is moving "], "question": "which woman is speaking", "label": 1}, {"captions": ["a child speaks in closed space", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["yW6FWLSLkx4", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["child, space, speak", "a, scream, girl"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a woman is speaking and a baby is crying"], "question": "which entity is a girl?", "label": 1}, {"captions": ["children cry and people talk", "music plays, a person speaks, followed by whooshes and a ding"], "sample_ids": ["xLwHe825Zs", "tQWGZLItBXk"], "start_seconds": ["18", "170"], "properties": ["people talk, children cry, people talk", "music, person, ding"], "captions_pred_video": [null, "worms revolution screenshots"], "captions_pred_audio": ["a baby cries and a woman speaks", "a child speaks music plays video game sounds sound effects and sound effects play "], "question": "which entity has a person speaking?", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "wind blows as people chatter quietly"], "sample_ids": ["x6ijhqRY38s", "xBxDz0CFVn0"], "start_seconds": ["250", "30"], "properties": ["bowl, silverware, man", "wind, chatter, people"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["food is frying then a woman speaks", "a man woman speak while crickets sing"], "sample_ids": ["ukxt9I7eMMg", "zTLVJCo4WEE"], "start_seconds": ["30", "30"], "properties": ["food, woman, speak", "a, crickets, sing"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "- a boy with a rifle aiming at a target"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a woman speaks and crickets chirp"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "a toilet flushes and a female speaks"], "sample_ids": ["tDlfY3nmx1A", "yaln9y8I7ms"], "start_seconds": ["160", "230"], "properties": ["applause, laugh, man", "female, flushes, toilet"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "footage is blurry and out of focus"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a toilet flushes and a man speaks"], "question": "which entity is a video of a toilet flushing?", "label": 1}, {"captions": ["a rumble grows louder", "a stream of water runs briefly"], "sample_ids": ["y4MY9mp8-TA", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["loudness, increase, rumble", "stream, water, run"], "captions_pred_video": ["a helicopter flying in the sky", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a helicopter flies overhead ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a man speaks as bees buzz and birds chirp", "a person whistles a meandering tune"], "sample_ids": ["t25U-v4k4ts", "uFoga8sHpiw"], "start_seconds": ["40", "90"], "properties": ["bees buzz, birds chirp, man speaks", "person, tune, whistle"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "footage of a bird in a cage"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a person whistles a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a speedboat passes quickly on the water", "waves crash against a shoreline and people speak"], "sample_ids": ["tjmoSi330GM", "yFB25fqfU8I"], "start_seconds": ["23", "300"], "properties": ["speed, water, boat", "wave, crash, shoreline"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which is moving slower", "label": 0}, {"captions": ["birds chirp and an insect buzzes around", "several insects fly while two men talk"], "sample_ids": ["t97k0cejSQE", "s-T9OVOiMLo"], "start_seconds": ["250", "330"], "properties": ["bird, chirp, insect", "several, fly, men"], "captions_pred_video": ["a bee on a purple thistle flower", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more insects", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "a man speaks as a motor runs in the background"], "sample_ids": ["sHbXC6na9hg", "xZepNM9qcRA"], "start_seconds": ["0", "30"], "properties": ["a person, saw, wood", "background, motor, run"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["an engine is idling and vibrating", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["an airplane engine runs", "a duck quacks continuously"], "sample_ids": ["yVPZ2MNWpms", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["engine, airplane, runs", "quacks, continuously, duck"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a car is driving by on the road ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "plastic is tapped on while someone speaks"], "sample_ids": ["xO-Q2BlIIPU", "wvKpEYswXO0"], "start_seconds": ["30", "150"], "properties": ["two men, exclamation, speak", "plastic, tap, speak"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is about a man speaking to another man?", "label": 0}, {"captions": ["a woman speaks in a fast tone with a male", "a stream of water flows as people talk and wind blows"], "sample_ids": ["sTpirNYo8vQ", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["a, tone, fast", "stream, water, flow"], "captions_pred_video": ["of a man taking a selfie on a bus", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a male speaks over some small clicks", "a telephone rings followed by a woman talking"], "sample_ids": ["uXxVebHsGZ8", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["male, clicks, speak", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "some men converse over an engine running"], "sample_ids": ["shmR4OZtzqA", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["man, engine, idle", "men, converse, engine"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", null], "captions_pred_audio": ["a man speaks while a motor runs", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows a man speaking as a vehicle engine idles?", "label": 0}, {"captions": ["a person screams glaringly", "a train horn blows as it passes by"], "sample_ids": ["xC8kbrKJmco", "zVacuqSb4LI"], "start_seconds": ["0", "30"], "properties": ["glaringly, screams, person", "horn, blows, train"], "captions_pred_video": [null, "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a goat is bleating ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["wind blows strongly", "waves crash against a shoreline and people speak"], "sample_ids": ["w8uLijTqtlU", "yFB25fqfU8I"], "start_seconds": ["70", "300"], "properties": ["wind, blows, strongly", "wave, crash, shoreline"], "captions_pred_video": ["footage is blurry and shaky", "footage of a person surfing in the ocean"], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more powerful", "label": 1}, {"captions": ["bees buzz and wind blows", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["tMJne1a4AFI", "uZesmtKZGSw"], "start_seconds": ["0", "250"], "properties": ["bees buzz, wind blows, bees", "men, talk, cars"], "captions_pred_video": ["a swarm of bees on the ground", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["vb1fPSDI4c", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["multiple, people, yell", "beeps, hit, woman"], "captions_pred_video": [null, "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a weapon fires multiple times", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["sMC07Ucy7kg", "zFjIWfSD-4"], "start_seconds": ["10", "410"], "properties": ["weapon, fire, multiple", "People, motor, brakes"], "captions_pred_video": ["footage is from a car's point of view", null], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "an engine runs loudly"], "sample_ids": ["wnpJndXuxLc", "vqZuVbG6-HI"], "start_seconds": ["50", "130"], "properties": ["blows, vehicle, train", "loud, engine, run"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a lawn mower is running and men are speaking "], "question": "which is louder", "label": 1}, {"captions": ["a man speaks while video game music plays with some clicking", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["tw76HGONaKg", "uZesmtKZGSw"], "start_seconds": ["570", "250"], "properties": ["music, click, man", "men, talk, cars"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["goats bleat and metal clings", "pigeons vocalize and birds chirp"], "sample_ids": ["tH17JPjDPnc", "uiS58TNyUiw"], "start_seconds": ["260", "430"], "properties": ["bleat, metal, clings", "vocalize, bird, chirp"], "captions_pred_video": ["feed of the goats eating hay in the barn", "of the pigeon in the cage"], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a man is speaking and a bee is buzzing"], "question": "which animal is vocalizing", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "a woman talks while a baby cries and a man whispers"], "sample_ids": ["sG7TyPnFDR0", "smDKStoHBJo"], "start_seconds": ["180", "0"], "properties": ["beeps, machine, smoke alarm", "a, talk, baby, cry"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "a man holding a crying baby in his arms"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a baby is crying and a woman is speaking"], "question": "which entity has a baby?", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a man speaks as a car is passing by"], "sample_ids": ["vZAw4apG0Es", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["background, tick, repeat", "a, car, pass"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a moving object", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "long loud burping by a man"], "sample_ids": ["uzQnlJXBbOM", "xmiUIOhtZyQ"], "start_seconds": ["50", "60"], "properties": ["ringing, beep, stop", "loud, burp, man"], "captions_pred_video": ["footage of a person using a cell phone on a table", "homer simpson drinking a beer"], "captions_pred_audio": ["a telephone rings and a man speaks", "a person burps and music plays in the background "], "question": "which is louder", "label": 1}, {"captions": ["someone sprays a liquid onto a hard surface making a hiss sound", "a person snores loudly multiple times at a close distance"], "sample_ids": ["zO-LSSY92ZM", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["liquid, surface, sound", "loud, multiple, distance"], "captions_pred_video": ["youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'", null], "captions_pred_audio": ["steam is hissing and hissing", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a clock ticktocks continuously", "wind blows as people chatter quietly"], "sample_ids": ["vlJS7LN2XyM", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, ticktocks continuously", "wind, chatter, people"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage is blurry and out of focus"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a train engine runs and a horn blows", "several insects fly while two men talk"], "sample_ids": ["zPX9o1uDiI", "s-T9OVOiMLo"], "start_seconds": ["40", "330"], "properties": ["engine, horn, run", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a moving object", "label": 0}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["yZmhM1HcsyE", "vbZ-0lGPneg"], "start_seconds": ["4", "30"], "properties": ["engine, roar, water", "a woman, a television program, a bird"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a woman is speaking and a dog is whimpering"], "question": "which entity is more quiet", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "a duck quacks continuously"], "sample_ids": ["sxYkFKFIZD0", "vh30P49Po6s"], "start_seconds": ["20", "30"], "properties": ["screech, man, door", "quacks, continuously, duck"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "birds chirp in the background while a horse neighs followed by a girl speaking"], "sample_ids": ["vs65y4qmyBE", "s59PfAghdkM"], "start_seconds": ["340", "0"], "properties": ["engine, run, man", "bird, chirp, background, horse, neigh"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "is an anime scene featuring two people and a horse in the foreground and a fence in the background"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "birds are chirping a horse is neighing and a woman is speaking "], "question": "which entity has a horse neighing?", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "some men converse over an engine running"], "sample_ids": ["x5cuQjOdM3E", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["cat, meows, young woman", "men, converse, engine"], "captions_pred_video": ["a black background with an airplane flying in the sky", null], "captions_pred_audio": ["a cat meows and a woman speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is more active", "label": 1}, {"captions": ["a man talks while vehicles pass by", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["sK4u5T8hW78", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["a, man, talk", "gun, shoot, water"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used for hunting", "label": 1}, {"captions": ["a stream runs then someone speaks", "a woman sneezes then speaks"], "sample_ids": ["wbHTKEJZyhc", "x4dZyf9Gbj0"], "start_seconds": ["20", "130"], "properties": ["stream, run, someone", "sneezes, speaks, woman"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "footage is blurry and out of focus"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a woman sneezes and speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a person sniffles and sneezes", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["uRlbY6aoBU", "w5W5Kqtc8E"], "start_seconds": ["0", "100"], "properties": ["sneezes, sniffles, person", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is sneezing ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["s4Uz1Ffgo04", "vbZ-0lGPneg"], "start_seconds": ["100", "30"], "properties": ["roars, background, people speaking", "a woman, a television program, a bird"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a woman is speaking and a dog is whimpering"], "question": "which entity is more quiet", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "paper is crumpling consistently"], "sample_ids": ["w8uLijTqtlU", "v5cSxLaHADY"], "start_seconds": ["70", "0"], "properties": ["wind, microphone, noise", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage is blurry and shaky", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["the wind is blowing strongly", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["an insect buzzes around continuously", "three men talk while wind blows and some liquid flows"], "sample_ids": ["v25l1jef3JY", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["buzzes, continuously, insect", "three men, wind, flow"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a person", "label": 0}, {"captions": ["a young woman speaks over spraying and another person yells", "dishes cling together then a man begins to speak"], "sample_ids": ["uYT5gxnyMWM", "sQGXqGcwOTc"], "start_seconds": ["50", "3"], "properties": ["person, spray, yell", "cling, speak, dishes"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "mechanisms are operating and water is splashing "], "question": "which entity is about a person speaking?", "label": 0}, {"captions": ["a vehicle engine runs while a siren and horn sound", "people applaud and hoot and chat quietly"], "sample_ids": ["u--KhUW8l1Y", "wwyfGO2J4"], "start_seconds": ["0", "90"], "properties": ["engine, sound, horn", "people, applaud, hoot"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", null], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["uRExseg-0XI", "vfYTJq7nU"], "start_seconds": ["210", "130"], "properties": ["woman, man, water", "rustling, ducks, quack"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", null], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a duck quacks and a woman speaks"], "question": "which entity has more water", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "a telephone rings followed by a woman talking"], "sample_ids": ["zY3icUyMdh8", "tGcFnX0GHI"], "start_seconds": ["20", "0"], "properties": ["dog, bark, engine", "ring, talk, woman"], "captions_pred_video": ["footage of a bus driving through a residential street at night", null], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "a clock alarm sounds and gears turn"], "sample_ids": ["zALy31PjDl0", "w2M4i1mklOA"], "start_seconds": ["21", "30"], "properties": ["a man, a vehicle, a horn", "alarm, gears, turn"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "footage of an antique clock"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a clock is ticking and a bell is ringing "], "question": "which entity is a clock?", "label": 1}, {"captions": ["a machine runs continuously", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["wdXV3Pv0jiY", "tDlysoZiA1I"], "start_seconds": ["11", "0"], "properties": ["machine, running, continuously", "animal, grunts, chirps"], "captions_pred_video": ["footage is blurry and shaky", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "birds are chirping and a rooster is crowing "], "question": "which entity is not a machine?", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["shmR4OZtzqA", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["man, engine, idle", "rooster, crow, background, men"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man speaks while a motor runs", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "animals bleat and moo as a person speaks"], "sample_ids": ["vddP56-ogds", "tPJvjq9QePY"], "start_seconds": ["30", "40"], "properties": ["liquid, laughs, man", "animal, bleat, moo"], "captions_pred_video": [null, "a dog and a sheep in a barn"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a baby cries and a man speaks"], "question": "which entity is more animal-like", "label": 1}, {"captions": ["people speak as gunfire rings out", "a man speaks as a car is passing by"], "sample_ids": ["wqTCwqVRDlk", "sK4u5T8hW78"], "start_seconds": ["80", "30"], "properties": ["gunfire, ring, speak", "a, car, pass"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking and a gun is fired", "a man is speaking with background noise and breathing sounds "], "question": "which entity is more calm", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["sa6TLVbooCc", "yDoT73BWsdA"], "start_seconds": ["240", "10"], "properties": ["people, laugh, child", "engine, revs, vehicle"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "a person snores loudly multiple times at a close distance"], "sample_ids": ["sDSppXIlJrs", "sSMl2vc3ek"], "start_seconds": ["27", "20"], "properties": ["microphone, water, wind", "loud, multiple, distance"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", null], "captions_pred_audio": ["the wind is blowing and water is splashing", "a person snoring loudly"], "question": "which is louder", "label": 1}, {"captions": ["someone is snoring while sleeping", "a man speaks followed by another man speaking outside"], "sample_ids": ["ujMt0-D-x2k", "viuTg1M-dqg"], "start_seconds": ["0", "30"], "properties": ["snore, sleep, someone", "two men, speak, follow"], "captions_pred_video": ["of the dog playing with a toy on the floor", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking with background noise and breathing sounds "], "question": "which entity shows two men speaking?", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "some tunes played by whistling"], "sample_ids": ["xvDdE3zNf8Y", "u6BnG6YZqJ4"], "start_seconds": ["120", "0"], "properties": ["a, female, speaks", "tune, play, whistling"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a woman speaks and crumples paper", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a man speaks as bees buzz and birds chirp", "a stream of water runs briefly"], "sample_ids": ["t25U-v4k4ts", "x-PeY8Yb8M4"], "start_seconds": ["40", "300"], "properties": ["bees buzz, birds chirp, man speaks", "stream, water, run"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["t97k0cejSQE", "tDVADusiIoc"], "start_seconds": ["250", "60"], "properties": ["bird, chirp, insect", "water, radio, man"], "captions_pred_video": ["a bee on a purple thistle flower", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["t25U-v4k4ts", "vlS6YMeWAPo"], "start_seconds": ["40", "40"], "properties": ["a, chirps, bird", "sheep, baa, birds"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a goat bleats and birds chirp"], "question": "which entity has more animals", "label": 1}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "birds chirp in the background while a horse neighs followed by a girl speaking"], "sample_ids": ["wvKpEYswXO0", "s59PfAghdkM"], "start_seconds": ["150", "0"], "properties": ["sound, water, running", "bird, chirp, background, horse, neigh"], "captions_pred_video": ["of the person preparing food in the kitchen", "is an anime scene featuring two people and a horse in the foreground and a fence in the background"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "birds are chirping a horse is neighing and a woman is speaking "], "question": "which entity has a horse neighing?", "label": 1}, {"captions": ["continuous sneezing together with speech", "a woman speaks happily and an animal chirps"], "sample_ids": ["x4dZyf9Gbj0", "uWAAAL4CIoc"], "start_seconds": ["130", "0"], "properties": ["continuous, sneeze, speech", "a woman, chirps, animal"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a woman sneezes and speaks", "a woman is speaking and a dog is barking "], "question": "which entity is more like a bird", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["y8WEcpOlT3I", "vfYTJq7nU"], "start_seconds": ["40", "130"], "properties": ["harsh, wind, blows", "rustling, ducks, quack"], "captions_pred_video": ["on how to use a sewing machine youtube", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a duck quacks and a woman speaks"], "question": "which entity is about a harsh wind blowing?", "label": 0}, {"captions": ["heavy rain splashes as it falls", "vehicles pass by on a roadway"], "sample_ids": ["wP8ZKrlx3oA", "tgbONvsP47Y"], "start_seconds": ["40", "0"], "properties": ["fall, rain, splash", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["children cry and people talk", "a telephone rings followed by a woman talking"], "sample_ids": ["xLwHe825Zs", "tGcFnX0GHI"], "start_seconds": ["18", "0"], "properties": ["people talk, children cry, people talk", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby cries and a woman speaks", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["a child yells and another yells", "a toilet flushes and water drains"], "sample_ids": ["vMDHu7Lxcgw", "sfAvvZwdLCY"], "start_seconds": ["410", "20"], "properties": ["two, yell, child", "water drains, flushes, water"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["birds chirp as a bell rings", "water flows as men speak and yell"], "sample_ids": ["ziUT9IFTkjg", "vJ7JPEFhyLA"], "start_seconds": ["10", "16"], "properties": ["chirp, bell, ring", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["an airplane engine runs", "an insect buzzes around continuously"], "sample_ids": ["yVPZ2MNWpms", "v25l1jef3JY"], "start_seconds": ["0", "0"], "properties": ["engine, airplane, runs", "buzzes, continuously, insect"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a car is driving by on the road ", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vcmWSmvti8", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["music, man, fire", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more liquid flowing", "label": 1}, {"captions": ["food is frying and sizzles", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["zNRChLjqcU", "xKB8O8LTs6s"], "start_seconds": ["220", "70"], "properties": ["food is frying, sizzles, food", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["water is running from a faucet into a sink", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is a movie?", "label": 1}, {"captions": ["some tunes played by whistling", "water runs from a faucet while some men speak and the water runs in the sink"], "sample_ids": ["u6BnG6YZqJ4", "vzceMbklWc"], "start_seconds": ["0", "180"], "properties": ["tune, play, whistling", "water, faucet, sink"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", null], "captions_pred_audio": ["a person whistling a song", "water is running and a man is speaking"], "question": "which entity is a video of a person playing a tune?", "label": 0}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "dishes cling together then a man begins to speak"], "sample_ids": ["uqFtmnhuqA8", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["a, b, c", "cling, speak, dishes"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "mechanisms are operating and water is splashing "], "question": "which entity is about a clock?", "label": 0}, {"captions": ["a man speaks as crickets sing", "an adult speaks and is typing on a computer keyboard"], "sample_ids": ["ryFDPxgDOGc", "x9JovgqUcs"], "start_seconds": ["570", "500"], "properties": ["a, crickets, sing", "An adult is speaking, typing, and using a computer keyboard"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man speaks and types on a keyboard"], "question": "which entity is speaking", "label": 1}, {"captions": ["a child speaks", "a man is snoring loudly and repeatedly"], "sample_ids": ["yW6FWLSLkx4", "sncRqQ67iJU"], "start_seconds": ["40", "460"], "properties": ["a, child, speaks", "loud, repeatedly, man"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "of an airplane flying in the dark sky at night"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a person is snoring"], "question": "which entity is louder", "label": 1}, {"captions": ["a person sniffles and sneezes", "water is sprayed across a hard surface"], "sample_ids": ["uRlbY6aoBU", "sQwlkXjQabo"], "start_seconds": ["0", "10"], "properties": ["sneezes, sniffles, person", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is sneezing ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a person is snoring while sleeping", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vJrjSeP17yE", "w5W5Kqtc8E"], "start_seconds": ["40", "100"], "properties": ["a person is sleeping, snoring, person", "wind, blow, vehicle"], "captions_pred_video": ["a black background with a small plane flying in the sky", null], "captions_pred_audio": ["a person snoring loudly", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a door slams shut roughly", "water flows as men speak and yell"], "sample_ids": ["zkKdxzNC97Y", "vJ7JPEFhyLA"], "start_seconds": ["27", "16"], "properties": ["a door, slams, shut", "water, flow, men"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 0}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a man speaks while rain falls onto a hard surface"], "sample_ids": ["wvKpEYswXO0", "wqN6IIHw3po"], "start_seconds": ["150", "30"], "properties": ["sound, water, running", "rain, surface, fall"], "captions_pred_video": ["of the person preparing food in the kitchen", "in your own words what is happening in this screenshot? blood splattered all over the place"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking and water is splashing"], "question": "which entity has a hard surface?", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["xV7Mg1QucSc", "tdWhHV3X25Q"], "start_seconds": ["14", "60"], "properties": ["alarm, ticktocks, laughs", "applause, audience, yells"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a person screams glaringly", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["xC8kbrKJmco", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["glaringly, screams, person", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a goat is bleating ", "a woman is speaking and a baby is crying"], "question": "which entity is a person?", "label": 0}, {"captions": ["a train horn blows as it passes by", "a car speeding up in the distance"], "sample_ids": ["zVacuqSb4LI", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["horn, blows, train", "distance, car, speed"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", null], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["xjvTpk2Zpr8", "tDVADusiIoc"], "start_seconds": ["70", "60"], "properties": ["wind, blows, vehicle", "water, radio, man"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows a vehicle racing past?", "label": 0}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "pigeons vocalize and birds chirp"], "sample_ids": ["xyL9F5VrjkE", "uiS58TNyUiw"], "start_seconds": ["20", "430"], "properties": ["wind, motor, distance", "vocalize, bird, chirp"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "of the pigeon in the cage"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a clock ticktocks continuously", "propeller rearing loudly with some male and female voices interspersed in the background"], "sample_ids": ["vlJS7LN2XyM", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["ticktocks, clock, ticktocks continuously", "background, male, female"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a ticktock of a clock", "a lawn mower is running and men are speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["people speak softly as food sizzles", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["yhQ2Lg-7qDY", "zj2R0XoFr5k"], "start_seconds": ["130", "50"], "properties": ["food, sizzle, speak", "airplane, boy, fly"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a woman speaks while a helicopter flies overhead "], "question": "which entity is about to fly", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "someone whistles a tune"], "sample_ids": ["xM4joTqDVp4", "sIXTftIuUgw"], "start_seconds": ["160", "90"], "properties": ["background, chirp, birds", "someone, tune, whistle"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", null], "captions_pred_audio": ["birds are chirping and a train is moving ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a stream of water flows quickly", "some men converse over an engine running"], "sample_ids": ["wbHTKEJZyhc", "sCiy7QS1U"], "start_seconds": ["20", "300"], "properties": ["stream, water, flow", "men, converse, engine"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", null], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a still image?", "label": 0}, {"captions": ["a man speaking with light rustling", "waves crash against a shoreline and people speak"], "sample_ids": ["zOZleIRqZm4", "yFB25fqfU8I"], "start_seconds": ["80", "300"], "properties": ["light, rustling, man", "wave, crash, shoreline"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more calm", "label": 0}, {"captions": ["a man talks followed by a woman shouting", "three men talk while wind blows and some liquid flows"], "sample_ids": ["s3cTDAj31g", "vJ7JPEFhyLA"], "start_seconds": ["80", "16"], "properties": ["man, talk, woman", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and a baby is crying", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more people", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["vzxHnu-SFEw", "y8WEcpOlT3I"], "start_seconds": ["80", "40"], "properties": ["two objects, woman, speak", "harsh, wind, blows"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["an animal growls followed by birds chirping", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["y2ZBGpgbhHM", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["animal, growl, bird", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["birds chirping and a dog panting", "a woman is speaking and a dog is whimpering"], "question": "which entity is a television program?", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a child speaks in closed space"], "sample_ids": ["vbZ-0lGPneg", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["a woman, a television program, a bird", "child, space, speak"], "captions_pred_video": ["of a man holding a baby duck in his hands", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["an engine idles quietly then gradually becomes louder", "small dogs yip and bark sharply"], "sample_ids": ["vbr9mHKc8WM", "v-wcQf4BDY0"], "start_seconds": ["40", "120"], "properties": ["noise, loudness, engine", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["an engine is idling", "a dog barks and growls"], "question": "which entity is louder", "label": 1}, {"captions": ["an airplane engine runs", "winds blows roughly as a vehicle races past"], "sample_ids": ["yVPZ2MNWpms", "xjvTpk2Zpr8"], "start_seconds": ["0", "70"], "properties": ["engine, airplane, runs", "wind, blows, vehicle"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a car is driving by on the road ", "a jet engine roars and wind blows "], "question": "which entity is moving", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "a man speaks as a car is passing by"], "sample_ids": ["sLUnaPT5gM8", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["loud, laughter, intermittent", "a, car, pass"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a man speaking to a car passing by?", "label": 1}, {"captions": ["birds chirp and wind blows", "goats bleat and people speak"], "sample_ids": ["sxIvBMSavMQ", "z5iUE5h0EPs"], "start_seconds": ["210", "30"], "properties": ["birds, chirp, wind", "goats bleat, people speak, language"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "of the goat in the barn"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a goat bleats and a man speaks"], "question": "which entity is a language", "label": 1}, {"captions": ["a door opens and birds chirp", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["yeFvk9x0wWI", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["door, open, birds", "loud, laughter, intermittent"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["water is sprayed across a hard surface", "multiple people speak and children yell while water gurgles"], "sample_ids": ["sQwlkXjQabo", "vb1fPSDI4c"], "start_seconds": ["10", "30"], "properties": ["water, spray, surface", "multiple, people, yell"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", null], "captions_pred_audio": ["spraying followed by silence", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["dogs barking and whimpering", "an insect buzzes around continuously"], "sample_ids": ["tIY7qOV3rEM", "v25l1jef3JY"], "start_seconds": ["0", "0"], "properties": ["barking, whimpering, dog", "buzzes, continuously, insect"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a fly is buzzing around a microphone "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sofxkNWaP0s", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["wind, engine, louder", "men, talk, cars"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about a man speaking and wind blowing as an aircraft engine becomes louder?", "label": 0}, {"captions": ["a man speaks as a motor runs in the background", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["xZepNM9qcRA", "ziUT9IFTkjg"], "start_seconds": ["30", "10"], "properties": ["background, motor, run", "background, birds, rustling"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", null], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "birds are chirping and a chime is ringing "], "question": "which background is quieter", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "multiple adults speaking, and a child shouting in the background"], "sample_ids": ["wRBHTgrbiwg", "yks4cLgIDMc"], "start_seconds": ["50", "170"], "properties": ["bird, owl, speak", "background, speaking, child"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "footage of two kids wrestling on the floor"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking and a child is crying"], "question": "which entity has a child shouting in the background?", "label": 1}, {"captions": ["there are rhythmical snoring nearby", "plastic is tapped on while someone speaks"], "sample_ids": ["ujMt0-D-x2k", "wvKpEYswXO0"], "start_seconds": ["0", "150"], "properties": ["snoring, rhythmical, nearby", "plastic, tap, speak"], "captions_pred_video": ["of the dog playing with a toy on the floor", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["spYNpeN7rPY", "tDVADusiIoc"], "start_seconds": ["1", "60"], "properties": ["a clock, ticktock, man", "water, radio, man"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a clock?", "label": 0}, {"captions": ["a young woman speaks over spraying and another person yells", "paper folding and crinkling"], "sample_ids": ["uYT5gxnyMWM", "zPpG3RD8lSs"], "start_seconds": ["50", "20"], "properties": ["person, spray, yell", "paper, fold, crinkle"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "the wind blows and a mouse clicks "], "question": "which entity is a demonstration of a skill", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "several insects fly while two men talk"], "sample_ids": ["wudZTNBtVqc", "s-T9OVOiMLo"], "start_seconds": ["60", "330"], "properties": ["accelerates, engine, wind", "several, fly, men"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a moving object", "label": 0}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vlJS7LN2XyM", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["background, clocks, ticking", "male, duck, laugh"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", null], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and ducks are quacking"], "question": "which entity is more active", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "an insect buzzes around continuously"], "sample_ids": ["sZPuqDgX2V0", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["engine, accelerate, intercom", "buzzes, continuously, insect"], "captions_pred_video": [null, "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["an engine runs loudly", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["vqZuVbG6-HI", "ziUT9IFTkjg"], "start_seconds": ["130", "10"], "properties": ["loud, engine, run", "background, birds, rustling"], "captions_pred_video": ["footage is blurry because it's raining outside", null], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "birds are chirping and a chime is ringing "], "question": "which is quieter", "label": 1}, {"captions": ["a dark barks and whimpers", "a man talks followed by a woman shouting"], "sample_ids": ["sYj4hpDUZDQ", "s3cTDAj31g"], "start_seconds": ["30", "80"], "properties": ["barks, whimpers, dark", "man, talk, woman"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", null], "captions_pred_audio": ["a dog barks and a cat meows", "a man is speaking and a baby is crying"], "question": "which entity is talking", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["wqZ135Ssz0", "wz7N8YRy74I"], "start_seconds": ["60", "30"], "properties": ["man, woman, squawks", "rooster, crow, background, men"], "captions_pred_video": [null, "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a car accelerates and wind blows", "paper is crumpling consistently"], "sample_ids": ["u0TrcHhkPQ", "v5cSxLaHADY"], "start_seconds": ["20", "0"], "properties": ["accelerates, wind, blows", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "distant men speak as a spray can nozzle is depressed"], "sample_ids": ["tEE3MpBt1sg", "rwtmaKiCcQU"], "start_seconds": ["50", "30"], "properties": ["drill, something, laugh", "nozzle, depressed, spray can"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "shows a man spraying paint on a wall with a spray gun"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "spraying and people speaking"], "question": "which entity is about a drill?", "label": 0}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "a duck quacks loudly and continuously"], "sample_ids": ["uqFtmnhuqA8", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["a, b, c", "loud, continuous, quacks"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "an airplane engine runs"], "sample_ids": ["xV7Mg1QucSc", "yVPZ2MNWpms"], "start_seconds": ["14", "0"], "properties": ["alarm, ticktocks, laughs", "engine, airplane, runs"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "some tunes played by whistling"], "sample_ids": ["sShpyu2l4YQ", "u6BnG6YZqJ4"], "start_seconds": ["0", "0"], "properties": ["growl, bark, yip", "tune, play, whistling"], "captions_pred_video": ["the puppies are playing with a toy", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a dog is barking and growling", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "an airplane engine spools and people speak"], "sample_ids": ["sa6TLVbooCc", "wTjoRj1se3U"], "start_seconds": ["240", "390"], "properties": ["people, laugh, child", "airplane, engine, spool"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a jet engine is running and people are talking"], "question": "which entity is about a child speaking?", "label": 0}, {"captions": ["an animal bleats and cries out and metal bangs", "water flows and trickles"], "sample_ids": ["xfudFO976zE", "tB7hWb9gTuQ"], "start_seconds": ["0", "30"], "properties": ["animal, bleats, cry", "water, flow, trickle"], "captions_pred_video": ["footage is blurry and shaky", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["dogs barking and whimpering", "a infant makes noise and is excited"], "sample_ids": ["tIY7qOV3rEM", "wIJK3-5y0kA"], "start_seconds": ["0", "30"], "properties": ["barking, whimpering, dog", "noise, excited, infant"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "a stream of water flows as people talk and wind blows"], "sample_ids": ["yks4cLgIDMc", "xBxDz0CFVn0"], "start_seconds": ["170", "30"], "properties": ["background, speaking, child", "stream, water, flow"], "captions_pred_video": ["footage of two kids wrestling on the floor", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a child is crying", "a man is speaking with wind noise in the background "], "question": "which entity is a stream of water flowing?", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "wind blowing followed by a zoom"], "sample_ids": ["sQGXqGcwOTc", "vr8ZXjEBhMQ"], "start_seconds": ["3", "150"], "properties": ["cling, speak, dishes", "wind, blow, zoom"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["an engine revs and a turning noise is made", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["tOSWIURC-4", "y2bVZ7rz-5M"], "start_seconds": ["0", "280"], "properties": ["noise, engine, revs", "motor noise, horn, siren"], "captions_pred_video": [null, "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a lawn mower is running ", "a truck is honking its horn and a siren is blaring "], "question": "which noise is made by a motor", "label": 1}, {"captions": ["ticking continues without interruption", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["v-g-j2uTByM", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["ticking, continuous, clock", "a, scream, girl"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a clock is ticking loudly", "a woman is speaking and a baby is crying"], "question": "which entity is not continuous", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "several insects fly while two men talk"], "sample_ids": ["uPDn2BFTHk", "s-T9OVOiMLo"], "start_seconds": ["140", "330"], "properties": ["lady, laugh, baby", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a horn blows as a train chugs along and warning bells ring"], "sample_ids": ["s4Uz1Ffgo04", "ukg5L09Wpvo"], "start_seconds": ["100", "150"], "properties": ["roars, background, people speaking", "a train, a horn, a bell"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a baby cries and a woman speaks", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["tMbMDvT50j8", "uYT5gxnyMWM"], "start_seconds": ["12", "50"], "properties": ["a, cry, woman", "female, spraying, scream"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a baby cries and a woman speaks", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["several beeps are followed by a hit and a woman talking", "a child babbles as a woman speaks"], "sample_ids": ["w34HjHr6gAY", "wEBlkGWVWwE"], "start_seconds": ["30", "260"], "properties": ["beeps, hit, woman", "a, babble, woman"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "shows a person writing on the whiteboard"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a woman is speaking and a child is speaking with background noise and clapping "], "question": "which entity has a woman talking?", "label": 0}, {"captions": ["a train horn sounds and railroad crossing ring", "a child speaks in closed space"], "sample_ids": ["s7knHCFW82w", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["horn, sound, train", "child, space, speak"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["a flush is followed by gurgling water, then another flush", "a man speaks as a motor runs in the background"], "sample_ids": ["tqR406bGiE", "xZepNM9qcRA"], "start_seconds": ["40", "30"], "properties": ["flush, water, gurgle", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a toilet is flushed", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["birds chirp and wind blows", "birds chirp and objects are moved around"], "sample_ids": ["sxIvBMSavMQ", "yPUYU6t3rwo"], "start_seconds": ["210", "370"], "properties": ["birds, chirp, wind", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "insects buzz and a man speaks"], "question": "which entity is about birds chirping?", "label": 0}, {"captions": ["vehicle engines race around a track as a man commentates", "a man speaks followed by another man speaking outside"], "sample_ids": ["sZPuqDgX2V0", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["commentator, race, track", "two men, speak, follow"], "captions_pred_video": [null, "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a man speaking followed by another man speaking outside?", "label": 1}, {"captions": ["an airplane engine roars increasingly louder", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["vBslzh7saPw", "su6FAOcOA8c"], "start_seconds": ["90", "4"], "properties": ["engine, roar, louder", "engine, idle, woman"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a woman is speaking and a subway train is moving "], "question": "which engine is quieter", "label": 1}, {"captions": ["a toilet flushes and water drains", "a vehicle engine accelerating then running on idle"], "sample_ids": ["sfAvvZwdLCY", "vYkA3cfXp5Q"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "engine, accelerate, idle"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a toilet is flushed", "an engine is idling"], "question": "which entity is a machine", "label": 1}, {"captions": ["a stream runs then someone speaks", "a car speeding up in the distance"], "sample_ids": ["wbHTKEJZyhc", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["stream, run, someone", "distance, car, speed"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", null], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "wind blowing followed by a zoom"], "sample_ids": ["sOa7g-44Dag", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["audio, scratching, man", "wind, blow, zoom"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a video", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "winds blows roughly as a vehicle races past"], "sample_ids": ["sNB8zxXneIM", "xjvTpk2Zpr8"], "start_seconds": ["20", "70"], "properties": ["several, quack, cocks", "wind, blows, vehicle"], "captions_pred_video": ["a group of geese in a cage", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "a child speaks in closed space"], "sample_ids": ["zsLxS-uLJTw", "yW6FWLSLkx4"], "start_seconds": ["20", "40"], "properties": ["horn, blast, train", "child, space, speak"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["an aircraft engine runs", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["yLCORCnd35Q", "yDoT73BWsdA"], "start_seconds": ["0", "10"], "properties": ["engine, aircraft, runs", "engine, revs, vehicle"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a race car accelerates and revs its engine "], "question": "which entity has a running engine", "label": 0}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "gunshots ring out, a man yells, and more shots follow"], "sample_ids": ["vfYTJq7nU", "vKrYfzleLB8"], "start_seconds": ["130", "110"], "properties": ["ducks, quack, man", "a, ring, gunshots"], "captions_pred_video": [null, "stock footage of a person holding a gun in their hand"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a man is speaking with background noise and a cap gun is fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "a machine beeps continuously"], "sample_ids": ["vuUVPzd2FXw", "y682ml90jGw"], "start_seconds": ["160", "11"], "properties": ["a, steam, release", "beeps, machine, continuously"], "captions_pred_video": ["of the person cooking on the grill with a spatula", null], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "an insect buzzes around continuously"], "sample_ids": ["sAam2NqGhLY", "v25l1jef3JY"], "start_seconds": ["20", "0"], "properties": ["snoring, breathing, child", "buzzes, continuously, insect"], "captions_pred_video": ["of a little girl sleeping on a couch", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a person is snoring", "a fly is buzzing around a microphone "], "question": "which entity is not a person", "label": 1}, {"captions": ["birds chirp and a pop occurs before a man speaks", "some tunes played by whistling"], "sample_ids": ["zuua6-5goWw", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["sound, pop, bird", "tune, play, whistling"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "a child speaks in closed space"], "sample_ids": ["vSeGhaZt-aI", "yW6FWLSLkx4"], "start_seconds": ["50", "40"], "properties": ["water, bubbles, run", "child, space, speak"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "pigeons vocalize and birds chirp"], "sample_ids": ["xvDdE3zNf8Y", "uiS58TNyUiw"], "start_seconds": ["120", "430"], "properties": ["a, female, speaks", "vocalize, bird, chirp"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "of the pigeon in the cage"], "captions_pred_audio": ["a woman speaks and crumples paper", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["several insects fly while two men talk", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["s-T9OVOiMLo", "uZesmtKZGSw"], "start_seconds": ["330", "250"], "properties": ["several, fly, men", "men, talk, cars"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "an engine runs loudly"], "sample_ids": ["zY3icUyMdh8", "vqZuVbG6-HI"], "start_seconds": ["20", "130"], "properties": ["dog, bark, engine", "loud, engine, run"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a car accelerates and wind blows", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["u0TrcHhkPQ", "su6FAOcOA8c"], "start_seconds": ["20", "4"], "properties": ["accelerates, wind, blows", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a machine runs continuously", "someone is typing on a computer keyboard"], "sample_ids": ["wdXV3Pv0jiY", "v0x1odnXtP0"], "start_seconds": ["11", "210"], "properties": ["machine, running, continuously", "keyboard, type, computer"], "captions_pred_video": ["footage is blurry and shaky", "how to make money on youtube in spanish"], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a person is typing on a keyboard"], "question": "which is not a machine", "label": 1}, {"captions": ["a person speaks over rustling leaves", "people applaud and hoot and chat quietly"], "sample_ids": ["zOZleIRqZm4", "wwyfGO2J4"], "start_seconds": ["80", "90"], "properties": ["rustling, leaves, person", "people, applaud, hoot"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "pigeons vocalize and birds chirp"], "sample_ids": ["ukxt9I7eMMg", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["food, pan, cook", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "of the pigeon in the cage"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a person", "label": 1}, {"captions": ["a clock ticktocks continuously", "a infant makes noise and is excited"], "sample_ids": ["vlJS7LN2XyM", "wIJK3-5y0kA"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, ticktocks continuously", "noise, excited, infant"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a ticktock of a clock", "a baby cries and a woman speaks"], "question": "which entity is quieter", "label": 0}, {"captions": ["a person burps loudly for a long time nearby", "an aircraft engine runs as wind blows heavily"], "sample_ids": ["vf44CgrjT0A", "xjvTpk2Zpr8"], "start_seconds": ["20", "70"], "properties": ["loud, long, person", "engine, run, wind"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a loud burp", "a jet engine roars and wind blows "], "question": "which entity is running", "label": 1}, {"captions": ["someone whistles a tune", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sIXTftIuUgw", "vJ7JPEFhyLA"], "start_seconds": ["90", "16"], "properties": ["someone, tune, whistle", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a person whistling a song", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more likely to be a song", "label": 0}, {"captions": ["a helicopter engine runs continuously", "paper is crumpling consistently"], "sample_ids": ["ugHJF0hfYkg", "v5cSxLaHADY"], "start_seconds": ["10", "0"], "properties": ["engine, running, continuously", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a helicopter is flying overhead ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["sapQIQUhFc", "uYT5gxnyMWM"], "start_seconds": ["280", "50"], "properties": ["water, trickles, flow", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["a machine clanks and thumps and a male speaks", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sWZzXuWYY", "xKB8O8LTs6s"], "start_seconds": ["420", "70"], "properties": ["male, clanks, thumps", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "multiple people speak and children yell while water gurgles"], "sample_ids": ["sa6TLVbooCc", "vb1fPSDI4c"], "start_seconds": ["240", "30"], "properties": ["people, laugh, child", "multiple, people, yell"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", null], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "people speak as gunfire rings out"], "sample_ids": ["yswmmRZFItk", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["background, frog, croak", "gunfire, ring, speak"], "captions_pred_video": ["a close up of a frog in the water", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a frog is croaking", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "a vehicle accelerates before a race car idles then accelerates quickly"], "sample_ids": ["zFjIWfSD-4", "sjlVMgdGSK0"], "start_seconds": ["410", "30"], "properties": ["People, motor, brakes", "accelerates, vehicle, race car"], "captions_pred_video": [null, "a 1965 ford falcon drag racing at 100mph on a 1/8 mile track"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["here comes the train and it starts to blow the horn and get close", "a toilet flushes and a female speaks"], "sample_ids": ["s7knHCFW82w", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["blow horn, get close, train", "female, flushes, toilet"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "footage is blurry and out of focus"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a toilet flushes and a man speaks"], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "an electric engine works nearby followed by a child talking"], "sample_ids": ["sOa7g-44Dag", "xSKJGCItUWE"], "start_seconds": ["30", "10"], "properties": ["audio, scratching, man", "engine, work, child"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "footage of the helicopter flying in the room"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a high pitched engine is running and a child speaks"], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["wind blows as people chatter quietly", "paper is crumpling consistently"], "sample_ids": ["xBxDz0CFVn0", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["wind, chatter, people", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage is blurry and out of focus", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "a motor vehicle roars, drowning out people speaking in the background"], "sample_ids": ["vZAw4apG0Es", "s4Uz1Ffgo04"], "start_seconds": ["30", "100"], "properties": ["background, tick, repeat", "roars, background, people speaking"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of an ambulance arriving at the scene of an accident"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking while a boat is moving and wind is blowing "], "question": "which entity is louder", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "a car accelerates and wind blows"], "sample_ids": ["spYNpeN7rPY", "u0TrcHhkPQ"], "start_seconds": ["1", "20"], "properties": ["a clock, ticktock, man", "accelerates, wind, blows"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", null], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["w34HjHr6gAY", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["beeps, squawk, child speaking", "loud, jet engine, roar"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "water flows as men speak and yell"], "sample_ids": ["vqZuVbG6-HI", "vJ7JPEFhyLA"], "start_seconds": ["130", "16"], "properties": ["background, male, female", "water, flow, men"], "captions_pred_video": ["footage is blurry because it's raining outside", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more water flowing", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "pigeons vocalize and birds chirp"], "sample_ids": ["ukg5L09Wpvo", "uiS58TNyUiw"], "start_seconds": ["150", "430"], "properties": ["a train, a horn, a bell", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "of the pigeon in the cage"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a duck quacks continuously"], "sample_ids": ["vBHyYJ8pL0", "vh30P49Po6s"], "start_seconds": ["2", "30"], "properties": ["noise, door, opening", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a dark barks and whimpers", "a frog croaks as other frogs croak in the background"], "sample_ids": ["sYj4hpDUZDQ", "yswmmRZFItk"], "start_seconds": ["30", "0"], "properties": ["barks, whimpers, dark", "background, frog, croak"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", "a close up of a frog in the water"], "captions_pred_audio": ["a dog barks and a cat meows", "a frog is croaking"], "question": "which entity is a croaker", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "two women and a man talk while a kid cries"], "sample_ids": ["sWZzXuWYY", "wyllXV6PjKo"], "start_seconds": ["420", "30"], "properties": ["male, speech, banging", "a kid, talk, cry"], "captions_pred_video": [null, null], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a woman speaks and a baby cries"], "question": "which entity has a kid crying?", "label": 1}, {"captions": ["food is frying and sizzles", "a woman speaks as she rubs two objects together"], "sample_ids": ["zNRChLjqcU", "vzxHnu-SFEw"], "start_seconds": ["220", "80"], "properties": ["food is frying, sizzles, food", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["water is running from a faucet into a sink", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is not a person?", "label": 0}, {"captions": ["a man speaks as a machine runs", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vD6lYD1l0BY", "uYT5gxnyMWM"], "start_seconds": ["330", "50"], "properties": ["a, machine, run", "female, spraying, scream"], "captions_pred_video": ["game controller being held in the hands of the person", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a woman is speaking and a baby is crying"], "question": "which entity has a female spraying and screaming?", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "water flows and trickles"], "sample_ids": ["x9JovgqUcs", "tB7hWb9gTuQ"], "start_seconds": ["500", "30"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man speaks and types on a keyboard", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "an infant crying as a woman laughs"], "sample_ids": ["y8WEcpOlT3I", "xhmRY9yhC7c"], "start_seconds": ["40", "20"], "properties": ["harsh, wind, blows", "a, laugh, infant"], "captions_pred_video": ["on how to use a sewing machine youtube", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["some men converse over an engine running", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sCiy7QS1U", "zj2R0XoFr5k"], "start_seconds": ["300", "50"], "properties": ["men, converse, engine", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["water flows and trickles", "a stream of water flows as people talk and wind blows"], "sample_ids": ["tB7hWb9gTuQ", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["water, flow, trickle", "stream, water, flow"], "captions_pred_video": ["the rocks on the beach are surrounded by water and the sky is visible in the background", "footage is blurry and out of focus"], "captions_pred_audio": ["water is splashing and gurgling", "a man is speaking with wind noise in the background "], "question": "which entity is flowing", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "a man speaks as a motor runs in the background"], "sample_ids": ["sZPuqDgX2V0", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["engine, accelerate, intercom", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "water pouring and bubbling"], "sample_ids": ["weDbePuc-Xc", "uyRfq-jKPpo"], "start_seconds": ["40", "50"], "properties": ["music, slaps, human", "water, bubbles, pouring"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["humming of idling and revving engine with a man speaking", "a toilet flushes and water drains"], "sample_ids": ["wqADXCzngMw", "sfAvvZwdLCY"], "start_seconds": ["340", "20"], "properties": ["audio, humming, revving", "water drains, flushes, water"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "a stream of water flows as people talk and wind blows"], "sample_ids": ["ukxt9I7eMMg", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["continuous, woman, speaking", "stream, water, flow"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is a video of a stream of water flowing?", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "a woman speaks happily and an animal chirps"], "sample_ids": ["sG7TyPnFDR0", "uWAAAL4CIoc"], "start_seconds": ["180", "0"], "properties": ["beeps, machine, smoke alarm", "a woman, chirps, animal"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", null], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a woman is speaking and a dog is barking "], "question": "which entity is more likely to be a bird?", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["x9JovgqUcs", "xKB8O8LTs6s"], "start_seconds": ["500", "70"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "music, gunfire, explosion"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man speaks and types on a keyboard", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more calm", "label": 0}, {"captions": ["a door opens and birds chirp", "birds chirp quietly and an adult man speaks"], "sample_ids": ["yeFvk9x0wWI", "zuua6-5goWw"], "start_seconds": ["30", "30"], "properties": ["door, open, birds", "birds, chirp, quiet, man, speaks"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "birds are chirping and a man is speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "a machine clanks and thumps and a male speaks"], "sample_ids": ["tQWGZLItBXk", "sWZzXuWYY"], "start_seconds": ["170", "420"], "properties": ["voice, music, whoosh", "male, clanks, thumps"], "captions_pred_video": ["worms revolution screenshots", null], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a sewing machine runs and a man speaks"], "question": "which entity has a male speaking?", "label": 1}, {"captions": ["long loud burping by a man", "vehicles pass by on a roadway"], "sample_ids": ["xmiUIOhtZyQ", "tgbONvsP47Y"], "start_seconds": ["60", "0"], "properties": ["loud, burp, man", "pass, vehicle, roadway"], "captions_pred_video": ["homer simpson drinking a beer", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a person burps and music plays in the background ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["wind blows as people chatter quietly", "a horn rings out as a machine runs by"], "sample_ids": ["xBxDz0CFVn0", "slZLHwNbbt4"], "start_seconds": ["30", "300"], "properties": ["wind, chatter, people", "a, horn, run"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "dishes cling together then a man begins to speak"], "sample_ids": ["wRV8yMk886E", "sQGXqGcwOTc"], "start_seconds": ["0", "3"], "properties": ["liquid, spray, nozzle", "cling, speak, dishes"], "captions_pred_video": ["two cars are parked in a parking lot at night", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man speaks followed by a loud burst", "mechanisms are operating and water is splashing "], "question": "which entity is about speaking?", "label": 0}, {"captions": ["electronic beeps occur in a short series", "dishes cling together then a man begins to speak"], "sample_ids": ["y682ml90jGw", "sQGXqGcwOTc"], "start_seconds": ["11", "3"], "properties": ["beeps, series, electronic", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a beeping sound is being made ", "mechanisms are operating and water is splashing "], "question": "which entity is a video", "label": 1}, {"captions": ["wind blows and people scream while an engine revs", "a machine beeps continuously"], "sample_ids": ["w5W5Kqtc8E", "y682ml90jGw"], "start_seconds": ["100", "11"], "properties": ["wind, engine, scream", "beeps, machine, continuously"], "captions_pred_video": [null, null], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["people clap and speak in the distance", "a door slams shut roughly"], "sample_ids": ["wwyfGO2J4", "zkKdxzNC97Y"], "start_seconds": ["90", "27"], "properties": ["clap, distance, speak", "a door, slams, shut"], "captions_pred_video": [null, "footage of the door opening and closing in slow motion"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a door is opened and closed"], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as water trickles down a stream", "an airplane engine runs"], "sample_ids": ["sapQIQUhFc", "yVPZ2MNWpms"], "start_seconds": ["280", "0"], "properties": ["water, stream, trickles", "engine, airplane, runs"], "captions_pred_video": [null, "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a car is driving by on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "a clock ticktocks"], "sample_ids": ["tDlysoZiA1I", "v-g-j2uTByM"], "start_seconds": ["0", "30"], "properties": ["animal, grunt, multiple", "ticktocks, clock, ticktocks"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["water running down a sink while a man is talking", "a person is snoring while sleeping"], "sample_ids": ["vSeGhaZt-aI", "vJrjSeP17yE"], "start_seconds": ["50", "40"], "properties": ["water, sink, talk", "a person is sleeping, snoring, person"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a black background with a small plane flying in the sky"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a person snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a person snores loudly multiple times at a close distance", "a man speaks as a car is passing by"], "sample_ids": ["sSMl2vc3ek", "sK4u5T8hW78"], "start_seconds": ["20", "30"], "properties": ["loud, multiple, distance", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "someone whistles a tune"], "sample_ids": ["v5P-ThUCINM", "sIXTftIuUgw"], "start_seconds": ["400", "90"], "properties": ["background, chirp, bird", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and birds are chirping", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "wind blowing followed by a zoom"], "sample_ids": ["vBHyYJ8pL0", "vr8ZXjEBhMQ"], "start_seconds": ["2", "150"], "properties": ["noise, door, opening", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is accompanied by a zoom", "label": 1}, {"captions": ["children speak as a female ask them questions", "a machine beeps continuously"], "sample_ids": ["wEBlkGWVWwE", "y682ml90jGw"], "start_seconds": ["260", "11"], "properties": ["female, speak, questions", "beeps, machine, continuously"], "captions_pred_video": ["shows a person writing on the whiteboard", null], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a beeping sound is being made "], "question": "which entity is not a machine?", "label": 0}, {"captions": ["roadway noise occurs and a truck accelerates", "roadway noise occurs and a truck accelerates"], "sample_ids": ["tgbONvsP47Y", "tgbONvsP47Y"], "start_seconds": ["0", "0"], "properties": ["noise, truck, accelerate", "noise, truck, accelerate"], "captions_pred_video": ["footage of a fire truck entering a garage", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a car is driving on the road ", "a car is driving on the road "], "question": "which truck accelerates", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "a toilet flushes and a female speaks"], "sample_ids": ["wz7N8YRy74I", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["rooster, crow, background, men", "female, flushes, toilet"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "some men converse over an engine running"], "sample_ids": ["wRV8yMk886E", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["liquid, spray, nozzle", "men, converse, engine"], "captions_pred_video": ["two cars are parked in a parking lot at night", null], "captions_pred_audio": ["a man speaks followed by a loud burst", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity shows men speaking and a nozzle spraying liquid?", "label": 0}, {"captions": ["a jet engine screams, then increases its power", "a car speeding up in the distance"], "sample_ids": ["vBslzh7saPw", "u0TrcHhkPQ"], "start_seconds": ["90", "20"], "properties": ["power, scream, increase", "distance, car, speed"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["women speak and laugh as wind blows", "a telephone rings followed by a woman talking"], "sample_ids": ["un9VQlzgZM", "tGcFnX0GHI"], "start_seconds": ["5", "0"], "properties": ["wind, speak, laugh", "ring, talk, woman"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["an engine starts and increases in power", "a toilet flushes and a female speaks"], "sample_ids": ["zjTG0gaGCUI", "yaln9y8I7ms"], "start_seconds": ["80", "230"], "properties": ["power, increase, engine", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a toilet flushes and a man speaks"], "question": "which entity is not a toilet?", "label": 0}, {"captions": ["a clock ticktocks continuously", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vlJS7LN2XyM", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["ticktocks, clock, ticktocks continuously", "men, talk, cars"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a person is whistling", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["sIXTftIuUgw", "vbZ-0lGPneg"], "start_seconds": ["90", "30"], "properties": ["person, whistling, person", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a person whistling a song", "a woman is speaking and a dog is whimpering"], "question": "which entity has more people", "label": 1}, {"captions": ["a small engine spits as it runs", "water splashes as an animal walks through"], "sample_ids": ["sZvwOuuPGP0", "w1ir-sZ3Im8"], "start_seconds": ["50", "90"], "properties": ["spits, engine, runs", "animal, water, splashes"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a medium engine is running ", "water splashes and gurgles as people speak"], "question": "which entity is not a person?", "label": 0}, {"captions": ["an engine runs loudly", "men speak and a nozzle sprays liquid"], "sample_ids": ["vqZuVbG6-HI", "wRV8yMk886E"], "start_seconds": ["130", "0"], "properties": ["loud, engine, run", "liquid, spray, nozzle"], "captions_pred_video": ["footage is blurry because it's raining outside", "two cars are parked in a parking lot at night"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a man speaks followed by a loud burst"], "question": "which entity is a spray?", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "water pouring and bubbling"], "sample_ids": ["xV7Mg1QucSc", "uyRfq-jKPpo"], "start_seconds": ["14", "50"], "properties": ["alarm, ticktocks, laughs", "water, bubbles, pouring"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a woman talking as an infant is crying", "someone is typing on a computer keyboard"], "sample_ids": ["tMbMDvT50j8", "v0x1odnXtP0"], "start_seconds": ["12", "210"], "properties": ["a, talk, infant", "keyboard, type, computer"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "how to make money on youtube in spanish"], "captions_pred_audio": ["a baby cries and a woman speaks", "a person is typing on a keyboard"], "question": "which is not a person", "label": 1}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "a man speaks as a motor runs in the background"], "sample_ids": ["wyllXV6PjKo", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["a baby, a woman, a man", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman speaks and a baby cries", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man speaks as water trickles down a stream", "a car accelerates and wind blows"], "sample_ids": ["sapQIQUhFc", "u0TrcHhkPQ"], "start_seconds": ["280", "20"], "properties": ["water, stream, trickles", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a man speaks while water drains"], "sample_ids": ["xSKJGCItUWE", "vSeGhaZt-aI"], "start_seconds": ["10", "50"], "properties": ["engine, run, boy", "water, drain, man"], "captions_pred_video": ["footage of the helicopter flying in the room", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a man is speaking and pouring liquid with background noise "], "question": "which entity is a man speaking while water drains?", "label": 1}, {"captions": ["an audience gives applause as a man yells and a group sings", "an infant crying as a woman laughs"], "sample_ids": ["tdWhHV3X25Q", "xhmRY9yhC7c"], "start_seconds": ["60", "20"], "properties": ["applause, audience, yells", "a, laugh, infant"], "captions_pred_video": ["a man is talking to another man on a stage in front of a microphone", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "water flows and trickles"], "sample_ids": ["w0xsN8X18Y", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["music, surface, rain", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks and is crumpling paper", "a telephone rings followed by a woman talking"], "sample_ids": ["xvDdE3zNf8Y", "tGcFnX0GHI"], "start_seconds": ["120", "0"], "properties": ["A, crumple, paper", "ring, talk, woman"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", null], "captions_pred_audio": ["a woman speaks and crumples paper", "a dial tone sounds followed by a woman speaking"], "question": "which woman is talking", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "wind blows as people chatter quietly"], "sample_ids": ["ukg5L09Wpvo", "xBxDz0CFVn0"], "start_seconds": ["150", "30"], "properties": ["a train, a horn, a bell", "wind, chatter, people"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "footage is blurry and out of focus"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["birds chirp and pigeons vocalize while walking around", "an infant crying as a woman laughs"], "sample_ids": ["wIvYjuR3nrg", "xhmRY9yhC7c"], "start_seconds": ["9", "20"], "properties": ["birds, pigeons, vocalize", "a, laugh, infant"], "captions_pred_video": ["footage of a pigeon sitting on a roof with trees in the background", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["birds are chirping and cooing", "a baby cries and a woman speaks"], "question": "which entity is a human", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "a propeller rotates loudly and intensely"], "sample_ids": ["w34HjHr6gAY", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["beeps, hit, woman", "loud, intense, propeller"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["a person snoring several times", "a stream of water runs briefly"], "sample_ids": ["spJCm8tD9Zo", "x-PeY8Yb8M4"], "start_seconds": ["90", "300"], "properties": ["snore, person, several", "stream, water, run"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a person is snoring loudly", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["sZPuqDgX2V0", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["commentator, race, track", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["multiple birds vocalize and wind blows", "an airplane engine runs"], "sample_ids": ["uoGVs9yUqY4", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["multiple, vocalize, wind", "engine, airplane, runs"], "captions_pred_video": ["for how to make a wooden shed door youtube", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a car is driving by on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a horn honks and then loudly blares", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["wnpJndXuxLc", "ziUT9IFTkjg"], "start_seconds": ["50", "10"], "properties": ["horn, honk, loud", "background, birds, rustling"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", null], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "birds are chirping and a chime is ringing "], "question": "which is quieter", "label": 1}, {"captions": ["sirens ring and approach with humming of distant traffic", "a man speaks followed by another man speaking outside"], "sample_ids": ["xERFUeZONz8", "viuTg1M-dqg"], "start_seconds": ["0", "30"], "properties": ["ring, approach, traffic", "two men, speak, follow"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["an emergency vehicle siren blares", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more than one speaker", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "an airplane accelerates briefly"], "sample_ids": ["xZepNM9qcRA", "zjTG0gaGCUI"], "start_seconds": ["30", "80"], "properties": ["background, motor, run", "accelerates, airplane, briefly"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", null], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a jet engine roars as wind blows "], "question": "which is a moving object", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["zFjIWfSD-4", "wDVMhEdTiVw"], "start_seconds": ["410", "30"], "properties": ["People, motor, brakes", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["an engine revs and a turning noise is made", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tOSWIURC-4", "tdWhHV3X25Q"], "start_seconds": ["0", "60"], "properties": ["noise, engine, revs", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a lawn mower is running ", "a man is speaking and a crowd is clapping"], "question": "which entity is a response to a performance", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "a man is snoring loudly and repeatedly"], "sample_ids": ["vmrxwuAMb2I", "sncRqQ67iJU"], "start_seconds": ["40", "460"], "properties": ["a dog, inhales, exhales", "loud, repeatedly, man"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "of an airplane flying in the dark sky at night"], "captions_pred_audio": ["a dog barks and growls", "a person is snoring"], "question": "which entity is louder", "label": 1}, {"captions": ["a woman speaks and other women and a man talk with her", "a clock ticktocks"], "sample_ids": ["vbpKkWvfOu4", "v-g-j2uTByM"], "start_seconds": ["560", "30"], "properties": ["a, woman, man", "ticktocks, clock, ticktocks"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "people cheer as a vehicle engine revs"], "sample_ids": ["spJCm8tD9Zo", "xjhAnI2q6hM"], "start_seconds": ["90", "6"], "properties": ["snores, wheezes, sleeps", "engine revs, vehicle, people"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a person is snoring loudly", "a truck is revving its engine and a man is speaking "], "question": "which entity is a person", "label": 0}, {"captions": ["a train horn blows as it passes by", "a woman speaks with water running"], "sample_ids": ["zVacuqSb4LI", "wTideSjRFS0"], "start_seconds": ["30", "30"], "properties": ["horn, blows, train", "water, running, woman"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "footage of a woman cooking in a kitchen with a microwave oven"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a woman is speaking while water is running in the background"], "question": "which entity is a moving object", "label": 0}, {"captions": ["birds tweet and squawk", "birds chirp as a man speaks and a younger person speaks"], "sample_ids": ["w1mlz3Pe4fU", "xl2PIWyXaM"], "start_seconds": ["300", "160"], "properties": ["squawk, tweet, scream", "chirp, man, younger person"], "captions_pred_video": ["of a bird in a cage", null], "captions_pred_audio": ["birds are chirping and singing", "birds are chirping and people are talking"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "a man speaks as a motor runs in the background"], "sample_ids": ["shmR4OZtzqA", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["man, engine, idle", "background, motor, run"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man speaks while a motor runs", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["bees buzz and wind blows", "a stream of water flows quickly"], "sample_ids": ["tMJne1a4AFI", "wbHTKEJZyhc"], "start_seconds": ["0", "20"], "properties": ["bees buzz, wind blows, bees", "stream, water, flow"], "captions_pred_video": ["a swarm of bees on the ground", "footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and"], "captions_pred_audio": ["a swarm of bees buzzing around", "a waterfall is flowing and people are speaking "], "question": "which entity is moving faster", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "some tunes played by whistling"], "sample_ids": ["vddP56-ogds", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["water, flow, laugh", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a toilet door squeaks as it is opened", "speaking following by laughing and clapping"], "sample_ids": ["sdXV-ylviw", "u2f5NpsoHBg"], "start_seconds": ["190", "30"], "properties": ["door, toilet, squeaks", "person, laugh, clap"], "captions_pred_video": [null, "is being projected on a screen at the front of the stage"], "captions_pred_audio": ["a dog barks and taps with background noise ", "a woman is speaking and a crowd is clapping"], "question": "which entity is a person", "label": 1}, {"captions": ["some tunes played by whistling", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["u6BnG6YZqJ4", "vbZ-0lGPneg"], "start_seconds": ["0", "30"], "properties": ["tune, play, whistling", "a woman, a television program, a bird"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a person whistling a song", "a woman is speaking and a dog is whimpering"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a man speaks as he moves silverware in a bowl", "some men converse over an engine running"], "sample_ids": ["x6ijhqRY38s", "sCiy7QS1U"], "start_seconds": ["250", "300"], "properties": ["bowl, silverware, man", "men, converse, engine"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", null], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "people speak as gunfire rings out"], "sample_ids": ["xKB8O8LTs6s", "wqTCwqVRDlk"], "start_seconds": ["70", "80"], "properties": ["music, gunfire, explosion", "gunfire, ring, speak"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man is speaking and a gun is fired"], "question": "which entity has more gunfire", "label": 1}, {"captions": ["a man speaks, then dials a rotary telephone", "a clock ticktocks"], "sample_ids": ["tK4VlLsNxak", "v-g-j2uTByM"], "start_seconds": ["120", "30"], "properties": ["a, dial, telephone", "ticktocks, clock, ticktocks"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["vzxHnu-SFEw", "w34HjHr6gAY"], "start_seconds": ["80", "30"], "properties": ["two objects, woman, speak", "beeps, hit, woman"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a beep sounds followed by a child speaking"], "question": "which woman is talking", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["tOj4tdLRaA", "wDVMhEdTiVw"], "start_seconds": ["70", "30"], "properties": ["woman, laugh, baby", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to cause harm", "label": 1}, {"captions": ["wind blows and a stream of water flows nearby", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["sYITalLZjj4", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["stream, flow, wind", "loud, jet engine, roar"], "captions_pred_video": ["two ducks are swimming in the water near each other", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["wind blows and birds chirp", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["tQWGZLItBXk", "vbZ-0lGPneg"], "start_seconds": ["170", "30"], "properties": ["music, person, ding", "a woman, a television program, a bird"], "captions_pred_video": ["worms revolution screenshots", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["birds chirp and objects are moved around", "water flows as men speak and yell"], "sample_ids": ["yPUYU6t3rwo", "vJ7JPEFhyLA"], "start_seconds": ["370", "16"], "properties": ["birds chirp, objects are moved around, birds", "water, flow, men"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["insects buzz and a man speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["su6FAOcOA8c", "y8WEcpOlT3I"], "start_seconds": ["4", "40"], "properties": ["engine, run, woman", "harsh, wind, blows"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a man is speaking with wind noise in the background "], "question": "which entity is a man speaking to another man?", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["zofjfKhqLk8", "w34HjHr6gAY"], "start_seconds": ["10", "30"], "properties": ["background, metal, clings", "beeps, hit, woman"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "winds blows roughly as a vehicle races past"], "sample_ids": ["yajyRTUQk3U", "xjvTpk2Zpr8"], "start_seconds": ["400", "70"], "properties": ["noise, woman, speak", "wind, blows, vehicle"], "captions_pred_video": ["- a woman cooking in the kitchen", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "an airplane engine runs"], "sample_ids": ["wqN6IIHw3po", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["rain, surface, fall", "engine, airplane, runs"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking and water is splashing", "a car is driving by on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a male speaks over some small clicks", "children speak and play together"], "sample_ids": ["uXxVebHsGZ8", "yVVP8XvWJTo"], "start_seconds": ["30", "260"], "properties": ["male, clicks, speak", "children, speak, play"], "captions_pred_video": [null, "footage of a playground at a school or daycare center"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "children are speaking and breathing with background noise "], "question": "which entity is more social", "label": 1}, {"captions": ["birds chirp and objects are moved around", "rain falls onto a hard surface and thunder roars before music plays"], "sample_ids": ["yPUYU6t3rwo", "xNMovAf3o50"], "start_seconds": ["370", "0"], "properties": ["birds chirp, objects are moved around, birds", "rain, thunder, music"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "tieng mua - the falling rain lynk lee"], "captions_pred_audio": ["insects buzz and a man speaks", "thunder and rain with music playing in the background "], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["someone whistles a tune", "wind blows as people chatter quietly"], "sample_ids": ["sIXTftIuUgw", "xBxDz0CFVn0"], "start_seconds": ["90", "30"], "properties": ["someone, tune, whistle", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a person whistling a song", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["small dogs yip and bark sharply", "birds chirp and a dog breathes heavily"], "sample_ids": ["v-wcQf4BDY0", "y2ZBGpgbhHM"], "start_seconds": ["120", "30"], "properties": ["bark, yip, sharply", "dog, chirp, breathe"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", null], "captions_pred_audio": ["a dog barks and growls", "birds chirping and a dog panting"], "question": "which entity is a dog", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "a man speaks as birds chirp and a vehicle passes nearby"], "sample_ids": ["zsLxS-uLJTw", "siJFXfGWgDk"], "start_seconds": ["20", "50"], "properties": ["horn, blast, train", "a, bird, vehicle"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a man is speaking and birds are chirping in the background "], "question": "which entity is a vehicle passing nearby?", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "a car speeding up in the distance"], "sample_ids": ["uC9dtII1KDI", "u0TrcHhkPQ"], "start_seconds": ["150", "20"], "properties": ["wind, gusts, distance", "distance, car, speed"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", null], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a horse runs while two women talk", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sdvI1mHAsc", "vJ7JPEFhyLA"], "start_seconds": ["20", "16"], "properties": ["two women, horse, run", "three men, wind, flow"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 0}, {"captions": ["an adult woman speaks over chopping and silverware noises", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["yYJksgsxx5U", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["audio, woman, silverware", "female, spraying, scream"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is a video?", "label": 1}, {"captions": ["motors runs briefly and tires screech", "a man speaks followed by another man speaking outside"], "sample_ids": ["yRx9txMcBl0", "viuTg1M-dqg"], "start_seconds": ["40", "30"], "properties": ["motors, tires, screech", "two men, speak, follow"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more than one speaker?", "label": 1}, {"captions": ["an engine runs and wind blows", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vs65y4qmyBE", "xfaoyyzw2WU"], "start_seconds": ["340", "180"], "properties": ["engine, run, wind", "loud, jet engine, roar"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "an airplane engine runs"], "sample_ids": ["u--KhUW8l1Y", "yVPZ2MNWpms"], "start_seconds": ["0", "0"], "properties": ["horn, siren, life", "engine, airplane, runs"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a car is driving by on the road "], "question": "which entity is a machine", "label": 1}, {"captions": ["a door slams shut roughly", "birds chirp and wind blows"], "sample_ids": ["zkKdxzNC97Y", "sxIvBMSavMQ"], "start_seconds": ["27", "210"], "properties": ["a door, slams, shut", "birds, chirp, wind"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a"], "captions_pred_audio": ["a door is opened and closed", "birds are chirping and insects are buzzing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks as a motor runs in the background", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["xZepNM9qcRA", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["background, motor, run", "clickety-clack, train, whistle"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "a person snores loudly multiple times at a close distance"], "sample_ids": ["yks4cLgIDMc", "sSMl2vc3ek"], "start_seconds": ["170", "20"], "properties": ["background, speaking, child", "loud, multiple, distance"], "captions_pred_video": ["footage of two kids wrestling on the floor", null], "captions_pred_audio": ["a man is speaking and a child is crying", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a flush is followed by gurgling water, then another flush", "some men converse over an engine running"], "sample_ids": ["tqR406bGiE", "sCiy7QS1U"], "start_seconds": ["40", "300"], "properties": ["flush, water, gurgle", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a toilet is flushed", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a toilet?", "label": 0}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "a car accelerates and wind blows"], "sample_ids": ["y8dSeubCNI", "u0TrcHhkPQ"], "start_seconds": ["4", "20"], "properties": ["men, women, car", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["an engine revving and people talking in the background", "a race car accelerates and revs its engine "], "question": "which car is moving faster", "label": 1}, {"captions": ["a vehicle engine runs while a siren and horn sound", "a vehicle is skidding and squealing tires"], "sample_ids": ["u--KhUW8l1Y", "soTOh3zYJfY"], "start_seconds": ["0", "40"], "properties": ["engine, sound, horn", "vehicle, skid, tires"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "a red car drifting on a winding road with smoke coming out of it"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a race car accelerates and revs its engine "], "question": "which vehicle is skidding", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "an infant crying frantically"], "sample_ids": ["sofxkNWaP0s", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["wind, engine, louder", "cry, infant, frantically"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "several insects fly while two men talk"], "sample_ids": ["uC9dtII1KDI", "s-T9OVOiMLo"], "start_seconds": ["150", "330"], "properties": ["wind, gusts, distance", "several, fly, men"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "a person uses a saw to cut some wood"], "sample_ids": ["vhJWZheqaE", "sHbXC6na9hg"], "start_seconds": ["0", "0"], "properties": ["water drains unevenly, toilet flushes, water drains", "a person, saw, wood"], "captions_pred_video": [null, "a man using a tractor to cut a log into firewood youtube"], "captions_pred_audio": ["a toilet is flushed", "an engine is idling and vibrating"], "question": "which entity is a person?", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "bees buzz as wind blows"], "sample_ids": ["xV7Mg1QucSc", "tMJne1a4AFI"], "start_seconds": ["14", "0"], "properties": ["alarm, ticktocks, laughs", "bees, buzz, wind"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "a swarm of bees on the ground"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a swarm of bees buzzing around"], "question": "which entity is not a clock?", "label": 1}, {"captions": ["a man speaks uses a drill", "a person snores loudly multiple times at a close distance"], "sample_ids": ["x5eIC7S0fbg", "sSMl2vc3ek"], "start_seconds": ["60", "20"], "properties": ["A man is speaking, uses a drill, and is a tool", "loud, multiple, distance"], "captions_pred_video": ["a person in surgical gloves is using a needle to remove a small object from a tooth", null], "captions_pred_audio": ["a man is speaking and using a power tool ", "a person snoring loudly"], "question": "which entity is a tool", "label": 0}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["siJFXfGWgDk", "uYT5gxnyMWM"], "start_seconds": ["50", "50"], "properties": ["man, woman, vehicle", "a, scream, girl"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "water is sprayed across a hard surface"], "sample_ids": ["y8WEcpOlT3I", "sQwlkXjQabo"], "start_seconds": ["40", "10"], "properties": ["wind, speak, buffeting", "water, spray, surface"], "captions_pred_video": ["on how to use a sewing machine youtube", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a person is snoring while sleeping", "some tunes played by whistling"], "sample_ids": ["vJrjSeP17yE", "u6BnG6YZqJ4"], "start_seconds": ["40", "0"], "properties": ["a person is sleeping, snoring, person", "tune, play, whistling"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a person snoring loudly", "a person whistling a song"], "question": "which entity is playing tunes", "label": 1}, {"captions": ["a man woman speak while crickets sing", "multiple people speak and children yell while water gurgles"], "sample_ids": ["zTLVJCo4WEE", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["a, crickets, sing", "multiple, people, yell"], "captions_pred_video": ["- a boy with a rifle aiming at a target", null], "captions_pred_audio": ["a woman speaks and crickets chirp", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a woman speaks and other women and a man talk with her"], "sample_ids": ["vbZ-0lGPneg", "vbpKkWvfOu4"], "start_seconds": ["30", "560"], "properties": ["a woman, a television program, a bird", "a, woman, man"], "captions_pred_video": ["of a man holding a baby duck in his hands", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a woman is speaking and a man is speaking"], "question": "which entity has more people", "label": 1}, {"captions": ["distant humming of an engine", "pigeons vocalize and birds chirp"], "sample_ids": ["yVPZ2MNWpms", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["sound, distance, engine", "vocalize, bird, chirp"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "of the pigeon in the cage"], "captions_pred_audio": ["a car is driving by on the road ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["an airplane accelerates briefly", "water is sprayed across a hard surface"], "sample_ids": ["zjTG0gaGCUI", "sQwlkXjQabo"], "start_seconds": ["80", "10"], "properties": ["accelerates, airplane, briefly", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a jet engine roars as wind blows ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "a vehicle accelerates squealing tires"], "sample_ids": ["ukxt9I7eMMg", "sd7xVssqlw"], "start_seconds": ["30", "50"], "properties": ["food, pan, cook", "accelerates, tires, squealing"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a person is snoring while sleeping", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["vJrjSeP17yE", "zFjIWfSD-4"], "start_seconds": ["40", "410"], "properties": ["a person is sleeping, snoring, person", "People, motor, brakes"], "captions_pred_video": ["a black background with a small plane flying in the sky", null], "captions_pred_audio": ["a person snoring loudly", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a person?", "label": 0}, {"captions": ["a woman speaks and is crumpling paper", "paper is crumpling consistently"], "sample_ids": ["xvDdE3zNf8Y", "v5cSxLaHADY"], "start_seconds": ["120", "0"], "properties": ["A, crumple, paper", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman speaks and crumples paper", "paper is crumpled and crinkled"], "question": "which entity is crumpling paper", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["tDVADusiIoc", "uEU-Hg5MTN8"], "start_seconds": ["60", "27"], "properties": ["water, radio, man", "animal, grunts, snorts"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a woman is speaking and a baby is crying"], "question": "which entity is about a woman speaking and an animal grunting and snorting?", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "people speak as gunfire rings out"], "sample_ids": ["s4Uz1Ffgo04", "wqTCwqVRDlk"], "start_seconds": ["100", "80"], "properties": ["roars, background, people speaking", "gunfire, ring, speak"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "a man speaks as a motor runs in the background"], "sample_ids": ["vf44CgrjT0A", "xZepNM9qcRA"], "start_seconds": ["20", "30"], "properties": ["loud, long, person", "background, motor, run"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a loud burp", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks as water trickles down a stream", "a woman speaks happily and an animal chirps"], "sample_ids": ["sapQIQUhFc", "uWAAAL4CIoc"], "start_seconds": ["280", "0"], "properties": ["water, stream, trickles", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a woman is speaking and a dog is barking "], "question": "which entity is more active", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["xzKKf9bKNUo", "yajyRTUQk3U"], "start_seconds": ["10", "400"], "properties": ["background, noise, snoring", "a woman, something, fried"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a child yells and another yells", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["vMDHu7Lxcgw", "y8WEcpOlT3I"], "start_seconds": ["410", "40"], "properties": ["two, yell, child", "harsh, wind, blows"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a man is speaking with wind noise in the background "], "question": "which entity has two people speaking?", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["w34HjHr6gAY", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["beeps, hit, woman", "a woman, a television program, a bird"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a woman is speaking and a dog is whimpering"], "question": "which entity has a woman talking?", "label": 0}, {"captions": ["food is frying then a woman speaks", "three men talk while wind blows and some liquid flows"], "sample_ids": ["ukxt9I7eMMg", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["food, woman, speak", "three men, wind, flow"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a frog vocalizes while birds chirp", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["vMf1dLD6Sng", "wqZ135Ssz0"], "start_seconds": ["6", "60"], "properties": ["frog, bird, vocalize", "two men, woman, birds"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", null], "captions_pred_audio": ["a frog croaks loudly", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity is a human activity", "label": 1}, {"captions": ["food is frying then a woman speaks", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["ukxt9I7eMMg", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["food, woman, speak", "female, spraying, scream"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["water flows as men speak and yell", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["vJ7JPEFhyLA", "vlS6YMeWAPo"], "start_seconds": ["16", "40"], "properties": ["water, flow, men", "sheep, baa, birds"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "people speak as gunfire rings out"], "sample_ids": ["ylpYOorfH4o", "wqTCwqVRDlk"], "start_seconds": ["410", "80"], "properties": ["engine, running, wind", "gunfire, ring, speak"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a war zone", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a man speaks followed by another man speaking outside"], "sample_ids": ["sK4u5T8hW78", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["a, car, pass", "two men, speak, follow"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["uYT5gxnyMWM", "zl9Dqx-j7q4"], "start_seconds": ["50", "6"], "properties": ["a, scream, girl", "engine, laugh, loud"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a jet engine roars "], "question": "which entity is followed by a scream", "label": 0}, {"captions": ["food is frying then a woman speaks", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["ukxt9I7eMMg", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["food, woman, speak", "animal, grunts, snorts"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is about a woman speaking?", "label": 0}, {"captions": ["a train horn blows as it passes by", "people applaud and hoot and chat quietly"], "sample_ids": ["zVacuqSb4LI", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["horn, blows, train", "people, applaud, hoot"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", null], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["sQwlkXjQabo", "wDVMhEdTiVw"], "start_seconds": ["10", "30"], "properties": ["liquid, surface, spray", "gun, shoot, water"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["spraying followed by silence", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a diesel truck engine runs while wind blows", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["xyL9F5VrjkE", "ukg5L09Wpvo"], "start_seconds": ["20", "150"], "properties": ["engine, run, wind", "clickety-clack, train, whistle"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a clicking followed by some people laughing and a kid speaking", "people speak in the background as a clock ticktocks"], "sample_ids": ["vz8868znkVQ", "vZAw4apG0Es"], "start_seconds": ["60", "30"], "properties": ["audio, click, kid speaking", "background, clock, ticktocks"], "captions_pred_video": ["a video of a plane flying over a cloudy sky", "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["a baby is laughing and breathing with background noise ", "a clock is ticking and people are talking"], "question": "which entity has a clock ticking in the background?", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "an insect buzzes around continuously"], "sample_ids": ["uRExseg-0XI", "v25l1jef3JY"], "start_seconds": ["210", "0"], "properties": ["woman, man, water", "buzzes, continuously, insect"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["u7C-AEBQM", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["ticks, rhythmic, quiet", "motor noise, horn, siren"], "captions_pred_video": [null, "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a ticktock of a clock", "a truck is honking its horn and a siren is blaring "], "question": "which entity is louder", "label": 1}, {"captions": ["an engine runs and wind blows", "dishes cling together then a man begins to speak"], "sample_ids": ["vs65y4qmyBE", "sQGXqGcwOTc"], "start_seconds": ["340", "3"], "properties": ["engine, run, wind", "cling, speak, dishes"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "mechanisms are operating and water is splashing "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["a vehicle engine runs as a siren and horn sound", "wind blows as people chatter quietly"], "sample_ids": ["u--KhUW8l1Y", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["sound, vehicle, horn", "wind, chatter, people"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "footage is blurry and out of focus"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a horse runs while two women talk", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["sdvI1mHAsc", "zFjIWfSD-4"], "start_seconds": ["20", "410"], "properties": ["two women, horse, run", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is more likely to be in a stable?", "label": 0}, {"captions": ["a man speaks while water trickles and flows", "an infant crying as a woman laughs"], "sample_ids": ["sapQIQUhFc", "xhmRY9yhC7c"], "start_seconds": ["280", "20"], "properties": ["water, trickles, flow", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "the clinking of a train bell with the humming of an engine and a train horn blowing"], "sample_ids": ["wSVhSdj0F0", "zgUgkpk78xU"], "start_seconds": ["10", "70"], "properties": ["horn honks, keys jingle, slam", "clinking, humming, horn"], "captions_pred_video": [null, "of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a train blows its horn as it speeds down the tracks "], "question": "which entity is a train?", "label": 1}, {"captions": ["birds vocalize and chirp continuously", "a man speaking with light rustling"], "sample_ids": ["w1mlz3Pe4fU", "zOZleIRqZm4"], "start_seconds": ["300", "80"], "properties": ["vocalize, chirp, continuously", "light, rustling, man"], "captions_pred_video": ["of a bird in a cage", "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["birds are chirping and singing", "a man is speaking with crickets chirping in the background"], "question": "which entity is speaking", "label": 1}, {"captions": ["someone snores nearby", "paper folding and crinkling"], "sample_ids": ["spJCm8tD9Zo", "zPpG3RD8lSs"], "start_seconds": ["90", "20"], "properties": ["someone snores, nearby, someone", "paper, fold, crinkle"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a person is snoring loudly", "the wind blows and a mouse clicks "], "question": "which entity is not a person", "label": 1}, {"captions": ["a frog vocalizes while birds chirp", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vMf1dLD6Sng", "tiDFTC-5vU"], "start_seconds": ["6", "30"], "properties": ["frog, bird, vocalize", "male, duck, laugh"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", null], "captions_pred_audio": ["a frog croaks loudly", "a man is speaking and ducks are quacking"], "question": "which entity is speaking", "label": 1}, {"captions": ["animals bleat and moo as a person speaks", "water flows as men speak and yell"], "sample_ids": ["tPJvjq9QePY", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["animal, bleat, moo", "water, flow, men"], "captions_pred_video": ["a dog and a sheep in a barn", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a baby cries and a man speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["a toilet flushes and water sputters as it drains", "water splashes as an animal walks through"], "sample_ids": ["smGI3C1NZc", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["water, drain, toilet", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a toilet is flushed", "water splashes and gurgles as people speak"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "people speak as gunfire rings out"], "sample_ids": ["wz7N8YRy74I", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["rooster, crow, background, people", "gunfire, ring, speak"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["children speak and play together", "people speak as gunfire rings out"], "sample_ids": ["yVVP8XvWJTo", "wqTCwqVRDlk"], "start_seconds": ["260", "80"], "properties": ["children, speak, play", "gunfire, ring, speak"], "captions_pred_video": ["footage of a playground at a school or daycare center", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a clock ticktocks briefly", "a woman and man speak while food is frying"], "sample_ids": ["u7C-AEBQM", "zk-xJGQU8-4"], "start_seconds": ["30", "130"], "properties": ["ticktocks, clock, ticktocks briefly", "food, man, woman"], "captions_pred_video": [null, "a man and a woman cooking in a wok on the stove"], "captions_pred_audio": ["a ticktock of a clock", "a woman is speaking while dishes are clanging and music is playing in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "some tunes played by whistling"], "sample_ids": ["ylpYOorfH4o", "u6BnG6YZqJ4"], "start_seconds": ["410", "0"], "properties": ["engine, running, wind", "tune, play, whistling"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a person whistling a song"], "question": "which entity is playing tunes", "label": 1}, {"captions": ["pigeons vocalize and birds chirp", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["uiS58TNyUiw", "tdWhHV3X25Q"], "start_seconds": ["430", "60"], "properties": ["vocalize, bird, chirp", "applause, audience, yells"], "captions_pred_video": ["of the pigeon in the cage", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a man is speaking and a crowd is clapping"], "question": "which entity is a response to a performance", "label": 1}, {"captions": ["a woman talking as an infant is crying", "dishes cling together then a man begins to speak"], "sample_ids": ["tMbMDvT50j8", "sQGXqGcwOTc"], "start_seconds": ["12", "3"], "properties": ["a, talk, infant", "cling, speak, dishes"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a baby cries and a woman speaks", "mechanisms are operating and water is splashing "], "question": "which entity is about a woman talking to an infant?", "label": 0}, {"captions": ["several ducks are quacking and squawking", "an engine idles quietly then gradually becomes louder"], "sample_ids": ["wfHeoPDLMaM", "vbr9mHKc8WM"], "start_seconds": ["30", "40"], "properties": ["quacking, squawking, ducks", "noise, loudness, engine"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", null], "captions_pred_audio": ["ducks are quacking", "an engine is idling"], "question": "which entity is quieter", "label": 1}, {"captions": ["a series of light horn beeps is followed by a loud steam whistle", "people cheer as a vehicle engine revs"], "sample_ids": ["wnpJndXuxLc", "xjhAnI2q6hM"], "start_seconds": ["50", "6"], "properties": ["beeps, loud, whistle", "engine revs, vehicle, people"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["w5W5Kqtc8E", "uYT5gxnyMWM"], "start_seconds": ["100", "50"], "properties": ["water, splashes, motorboat", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman is speaking and a baby is crying"], "question": "which entity is about a girl speaking followed by a scream?", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["sZPuqDgX2V0", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["engine, accelerate, intercom", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "a woman speaks as she rubs two objects together"], "sample_ids": ["w8uLijTqtlU", "vzxHnu-SFEw"], "start_seconds": ["70", "80"], "properties": ["wind, microphone, noise", "two objects, woman, speak"], "captions_pred_video": ["footage is blurry and shaky", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["the wind is blowing strongly", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "a car speeding up in the distance"], "sample_ids": ["y2bVZ7rz-5M", "u0TrcHhkPQ"], "start_seconds": ["280", "20"], "properties": ["engine, horn, siren", "distance, car, speed"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", null], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "wind blowing followed by a zoom"], "sample_ids": ["tDVADusiIoc", "vr8ZXjEBhMQ"], "start_seconds": ["60", "150"], "properties": ["water, radio, man", "wind, blow, zoom"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more calm", "label": 1}, {"captions": ["engines sputter roughly and tires squeal", "wind blowing followed by a zoom"], "sample_ids": ["zhx6hoYrHeI", "vr8ZXjEBhMQ"], "start_seconds": ["160", "150"], "properties": ["engine, sputter, rough", "wind, blow, zoom"], "captions_pred_video": ["footage of a man working on a motorcycle's tire", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a car accelerates and revs its engine ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is not a zoom?", "label": 0}, {"captions": ["an insect buzzes around continuously", "a duck quacks several times"], "sample_ids": ["v25l1jef3JY", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["buzzes, continuously, insect", "quacks, duck, several"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a flush is followed by gurgling water, then another flush", "wind blowing followed by a zoom"], "sample_ids": ["tqR406bGiE", "vr8ZXjEBhMQ"], "start_seconds": ["40", "150"], "properties": ["flush, water, gurgle", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a toilet is flushed", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["sxYkFKFIZD0", "tiDFTC-5vU"], "start_seconds": ["20", "30"], "properties": ["screech, man, door", "male, duck, laugh"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", null], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a man is speaking and ducks are quacking"], "question": "which entity has a door open?", "label": 0}, {"captions": ["people converse in the distance as a clock ticks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vZAw4apG0Es", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["people, clock, converse", "men, talk, cars"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["an engine runs and a man speaks", "a small engine idles continuously"], "sample_ids": ["yT5WfYMRr-U", "y5WII6cTH7k"], "start_seconds": ["30", "40"], "properties": ["engine, run, man", "engine, idle, continuously"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", "footage of a sewing machine stitching a red and white hat"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "an engine is knocking and vibrating "], "question": "which engine is running", "label": 0}, {"captions": ["someone whistles a tune", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sIXTftIuUgw", "sLUnaPT5gM8"], "start_seconds": ["90", "0"], "properties": ["someone, tune, whistle", "loud, laughter, intermittent"], "captions_pred_video": [null, "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a person whistling a song", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a train horn sounds as a railroad passing bell rings", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["zgUgkpk78xU", "zj2R0XoFr5k"], "start_seconds": ["70", "50"], "properties": ["horn, bell, train", "airplane, boy, fly"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a man speaks followed by another man speaking outside", "a stream of water runs briefly"], "sample_ids": ["viuTg1M-dqg", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["two men, speak, follow", "stream, water, run"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "a stream of water runs briefly"], "sample_ids": ["v7jJS8aAyA", "x-PeY8Yb8M4"], "start_seconds": ["10", "300"], "properties": ["wind, blows, loudly", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "a stream of water runs briefly"], "sample_ids": ["vuUVPzd2FXw", "x-PeY8Yb8M4"], "start_seconds": ["160", "300"], "properties": ["a, steam, release", "stream, water, run"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "some men converse over an engine running"], "sample_ids": ["zgUgkpk78xU", "sCiy7QS1U"], "start_seconds": ["70", "300"], "properties": ["horn, bells, ring", "men, converse, engine"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", null], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a car accelerates and wind blows"], "sample_ids": ["x5cuQjOdM3E", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["cat, meows, young woman", "accelerates, wind, blows"], "captions_pred_video": ["a black background with an airplane flying in the sky", null], "captions_pred_audio": ["a cat meows and a woman speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "wind blows as people chatter quietly"], "sample_ids": ["yajyRTUQk3U", "xBxDz0CFVn0"], "start_seconds": ["400", "30"], "properties": ["noise, woman, speak", "wind, chatter, people"], "captions_pred_video": ["- a woman cooking in the kitchen", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["food is frying while a woman speaks", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["yhQ2Lg-7qDY", "w5W5Kqtc8E"], "start_seconds": ["130", "100"], "properties": ["food, woman, speak", "wind, blow, vehicle"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", null], "captions_pred_audio": ["a faucet is running and a man is speaking", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blowing?", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "water pouring and bubbling"], "sample_ids": ["s4Uz1Ffgo04", "uyRfq-jKPpo"], "start_seconds": ["100", "50"], "properties": ["roars, background, people speaking", "water, bubbles, pouring"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a diesel truck engine runs continuously", "an infant crying as a woman laughs"], "sample_ids": ["sZvwOuuPGP0", "xhmRY9yhC7c"], "start_seconds": ["50", "20"], "properties": ["engine, diesel, truck", "a, laugh, infant"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a medium engine is running ", "a baby cries and a woman speaks"], "question": "which is not a person", "label": 0}, {"captions": ["an engine idles quietly then gradually becomes louder", "vehicles pass by on a roadway"], "sample_ids": ["vbr9mHKc8WM", "tgbONvsP47Y"], "start_seconds": ["40", "0"], "properties": ["noise, loudness, engine", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["an engine is idling", "a car is driving on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["someone snores nearby", "a car accelerates and wind blows"], "sample_ids": ["spJCm8tD9Zo", "u0TrcHhkPQ"], "start_seconds": ["90", "20"], "properties": ["someone snores, nearby, someone", "accelerates, wind, blows"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a clock ticktocks"], "sample_ids": ["wz7N8YRy74I", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["rooster, crow, background, people", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["yVumC9TGknc", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["humming, clock, birds", "beeps, hit, woman"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a series of beeps and chirps", "a beep sounds followed by a child speaking"], "question": "which entity has a clock ticking?", "label": 0}, {"captions": ["a sleeping person snores and wheezes", "someone snores nearby"], "sample_ids": ["spJCm8tD9Zo", "spJCm8tD9Zo"], "start_seconds": ["90", "90"], "properties": ["snores, wheezes, sleeps", "someone snores, nearby, someone"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a person is snoring loudly", "a person is snoring loudly"], "question": "which entity is a person?", "label": 0}, {"captions": ["a woman sneezes then speaks", "paper folding and crinkling"], "sample_ids": ["x4dZyf9Gbj0", "zPpG3RD8lSs"], "start_seconds": ["130", "20"], "properties": ["sneezes, speaks, woman", "paper, fold, crinkle"], "captions_pred_video": ["footage is blurry and out of focus", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a woman sneezes and speaks", "the wind blows and a mouse clicks "], "question": "which entity is a demonstration of folding and crinkling?", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "an infant crying as a woman laughs"], "sample_ids": ["ukxt9I7eMMg", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["food, pan, cook", "a, laugh, infant"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "a horn blows as a train chugs along and warning bells ring"], "sample_ids": ["t97k0cejSQE", "ukg5L09Wpvo"], "start_seconds": ["250", "150"], "properties": ["bird, chirp, insect", "a train, a horn, a bell"], "captions_pred_video": ["a bee on a purple thistle flower", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a train blows its whistle and blows its horn "], "question": "which entity is a train", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "birds tweet and squawk"], "sample_ids": ["uEU-Hg5MTN8", "w1mlz3Pe4fU"], "start_seconds": ["27", "300"], "properties": ["a woman, laughs, animal", "squawk, tweet, scream"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "of a bird in a cage"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "birds are chirping and singing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "dishes cling together then a man begins to speak"], "sample_ids": ["vs65y4qmyBE", "sQGXqGcwOTc"], "start_seconds": ["340", "3"], "properties": ["engine, run, man", "cling, speak, dishes"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "mechanisms are operating and water is splashing "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "water pouring and bubbling"], "sample_ids": ["xKB8O8LTs6s", "uyRfq-jKPpo"], "start_seconds": ["70", "50"], "properties": ["music, radio, gunshots", "water, bubbles, pouring"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "water is running from a faucet"], "question": "which entity is more calm", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "wind blows as people chatter quietly"], "sample_ids": ["v0x1odnXtP0", "xBxDz0CFVn0"], "start_seconds": ["210", "30"], "properties": ["keyboard, type, computer", "wind, chatter, people"], "captions_pred_video": ["how to make money on youtube in spanish", "footage is blurry and out of focus"], "captions_pred_audio": ["a person is typing on a keyboard", "a man is speaking with wind noise in the background "], "question": "which is quieter", "label": 0}, {"captions": ["white noise and snoring with some rustling in the background", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["xzKKf9bKNUo", "y8WEcpOlT3I"], "start_seconds": ["10", "40"], "properties": ["background, noise, snoring", "harsh, wind, blows"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking with wind noise in the background "], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "a car speeding up in the distance"], "sample_ids": ["sU53zg9Jp7s", "u0TrcHhkPQ"], "start_seconds": ["380", "20"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "distance, car, speed"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", null], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "wind blows as people chatter quietly"], "sample_ids": ["sQGXqGcwOTc", "xBxDz0CFVn0"], "start_seconds": ["3", "30"], "properties": ["audio, kid, giggles", "wind, chatter, people"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage is blurry and out of focus"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a machine runs continuously", "a car speeding up in the distance"], "sample_ids": ["wdXV3Pv0jiY", "u0TrcHhkPQ"], "start_seconds": ["11", "20"], "properties": ["machine, running, continuously", "distance, car, speed"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["mechanisms are operating and a bell is ringing ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["some clanking with distant murmuring", "some men converse over an engine running"], "sample_ids": ["uMTTDZ2mb4", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["clanking, murmuring, distant", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "wind blows as people chatter quietly"], "sample_ids": ["uqFtmnhuqA8", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["a, b, c", "wind, chatter, people"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "footage is blurry and out of focus"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks uses a drill", "water is sprayed across a hard surface"], "sample_ids": ["x5eIC7S0fbg", "sQwlkXjQabo"], "start_seconds": ["60", "10"], "properties": ["A man is speaking, uses a drill, and is a tool", "water, spray, surface"], "captions_pred_video": ["a person in surgical gloves is using a needle to remove a small object from a tooth", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking and using a power tool ", "spraying followed by silence"], "question": "which entity is a tool", "label": 0}, {"captions": ["several ducks are quacking and squawking", "several insects fly while two men talk"], "sample_ids": ["wfHeoPDLMaM", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["quacking, squawking, ducks", "several, fly, men"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["ducks are quacking", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a group of animals?", "label": 0}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "an airplane engine spools and people speak"], "sample_ids": ["vqZuVbG6-HI", "wTjoRj1se3U"], "start_seconds": ["130", "390"], "properties": ["background, male, female", "airplane, engine, spool"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a jet engine is running and people are talking"], "question": "which entity is about an airplane?", "label": 1}, {"captions": ["a clock ticktocks", "paper is crumpling consistently"], "sample_ids": ["v-g-j2uTByM", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["ticktocks, clock, ticktocks", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a clock is ticking loudly", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a child babbles as a woman speaks", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["wEBlkGWVWwE", "zj2R0XoFr5k"], "start_seconds": ["260", "50"], "properties": ["a, babble, woman", "airplane, boy, fly"], "captions_pred_video": ["shows a person writing on the whiteboard", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "people applaud and hoot and chat quietly"], "sample_ids": ["wqN6IIHw3po", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["rain, surface, fall", "people, applaud, hoot"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", null], "captions_pred_audio": ["a man is speaking and water is splashing", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be in a theater", "label": 1}, {"captions": ["a female speaks softly as paper crinkles", "water pouring and bubbling"], "sample_ids": ["xvDdE3zNf8Y", "uyRfq-jKPpo"], "start_seconds": ["120", "50"], "properties": ["a, female, speaks", "water, bubbles, pouring"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a woman speaks and crumples paper", "water is running from a faucet"], "question": "which entity is more likely to be a video of a person speaking?", "label": 0}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "a horn honks and then loudly blares"], "sample_ids": ["wsHBIgzs9Fs", "wnpJndXuxLc"], "start_seconds": ["50", "50"], "properties": ["horn, continuous, buzzing", "horn, honk, loud"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["an electronic device bleeps once", "crowd applause while a guy laughs followed by another man speaking"], "sample_ids": ["tHJ6JSa8Y4", "tDlfY3nmx1A"], "start_seconds": ["0", "160"], "properties": ["bleeps, electronic, device", "applause, laugh, man"], "captions_pred_video": [null, "a man in a suit and tie is talking to another man in a suit and tie"], "captions_pred_audio": ["a clock is ticking and beeping", "a crowd is clapping and laughing and a man is speaking "], "question": "which entity is a man speaking?", "label": 1}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "vehicle engines race around a track as a man commentates"], "sample_ids": ["sapQIQUhFc", "sZPuqDgX2V0"], "start_seconds": ["280", "30"], "properties": ["liquid, flow, distance", "commentator, race, track"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a man is speaking and a helicopter is flying overhead "], "question": "which entity is about a race", "label": 1}, {"captions": ["a man speaks while rain falls onto a hard surface", "a man sprays as a scraping occurs in the background"], "sample_ids": ["wqN6IIHw3po", "sOa7g-44Dag"], "start_seconds": ["30", "30"], "properties": ["rain, surface, fall", "background, man, spray"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic"], "captions_pred_audio": ["a man is speaking and water is splashing", "a man is speaking and rubbing his hands together "], "question": "which entity is a man speaking?", "label": 0}, {"captions": ["a car accelerates and wind blows", "a woman speaks as she rubs two objects together"], "sample_ids": ["u0TrcHhkPQ", "vzxHnu-SFEw"], "start_seconds": ["20", "80"], "properties": ["accelerates, wind, blows", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a process", "label": 1}, {"captions": ["a person speaks over rustling leaves", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zOZleIRqZm4", "tdWhHV3X25Q"], "start_seconds": ["80", "60"], "properties": ["rustling, leaves, person", "applause, audience, yells"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["an aircraft engine runs", "three men talk while wind blows and some liquid flows"], "sample_ids": ["yLCORCnd35Q", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["engine, aircraft, runs", "three men, wind, flow"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a moving object", "label": 1}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "paper is crumpling consistently"], "sample_ids": ["vlS6YMeWAPo", "v5cSxLaHADY"], "start_seconds": ["40", "0"], "properties": ["sheep, baa, birds", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a goat bleats and birds chirp", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["multiple birds vocalize and wind blows", "a duck quacks continuously"], "sample_ids": ["uoGVs9yUqY4", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["multiple, vocalize, wind", "quacks, continuously, duck"], "captions_pred_video": ["for how to make a wooden shed door youtube", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["birds are chirping and flapping their wings with wind noise in the background ", "a duck is quacking loudly"], "question": "which entity is a single animal", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "a propeller rotates loudly and intensely"], "sample_ids": ["rwTERCUno", "ugHJF0hfYkg"], "start_seconds": ["90", "10"], "properties": ["engine, idle, sputter", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["an engine is idling and vibrating", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "an airplane engine runs"], "sample_ids": ["vSeGhaZt-aI", "yVPZ2MNWpms"], "start_seconds": ["50", "0"], "properties": ["water, bubbles, run", "engine, airplane, runs"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "a person snores loudly multiple times at a close distance"], "sample_ids": ["tGcFnX0GHI", "sSMl2vc3ek"], "start_seconds": ["0", "20"], "properties": ["ring, talk, woman", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["some people speak", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vbZ-0lGPneg", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "a, scream, girl"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a woman is speaking and a baby is crying"], "question": "which entity has a scream?", "label": 1}, {"captions": ["humming of idling and revving engine with a man speaking", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["wqADXCzngMw", "uYT5gxnyMWM"], "start_seconds": ["340", "50"], "properties": ["audio, humming, revving", "female, spraying, scream"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a woman is speaking and a baby is crying"], "question": "which entity is a video", "label": 1}, {"captions": ["a man speaking with light rustling", "water flows as men speak and yell"], "sample_ids": ["zOZleIRqZm4", "vJ7JPEFhyLA"], "start_seconds": ["80", "16"], "properties": ["light, rustling, man", "water, flow, men"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 0}, {"captions": ["a man speaks while water trickles and flows", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["sapQIQUhFc", "w34HjHr6gAY"], "start_seconds": ["280", "30"], "properties": ["water, trickles, flow", "beeps, hit, woman"], "captions_pred_video": [null, "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["tapping occurs then a baby cries", "a person snores loudly multiple times at a close distance"], "sample_ids": ["wIJK3-5y0kA", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["a, cry, baby", "loud, multiple, distance"], "captions_pred_video": ["of a baby playing with a cat in a dark room", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["various birds chirp and squeal, and an animal grunts", "a person snores loudly multiple times at a close distance"], "sample_ids": ["tDlysoZiA1I", "sSMl2vc3ek"], "start_seconds": ["0", "20"], "properties": ["animal, grunts, chirps", "loud, multiple, distance"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", null], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "a stream of water flows as people talk and wind blows"], "sample_ids": ["tEE3MpBt1sg", "xBxDz0CFVn0"], "start_seconds": ["50", "30"], "properties": ["drill, something, laugh", "stream, water, flow"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage is blurry and out of focus"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is a moving object", "label": 1}, {"captions": ["food is frying then a woman speaks", "waves crash against a shoreline and people speak"], "sample_ids": ["ukxt9I7eMMg", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["food, woman, speak", "wave, crash, shoreline"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a video of a woman speaking?", "label": 0}, {"captions": ["a baby laugh at a sputter", "someone is typing on a computer keyboard"], "sample_ids": ["sLUnaPT5gM8", "v0x1odnXtP0"], "start_seconds": ["0", "210"], "properties": ["laugh, sputter, baby", "keyboard, type, computer"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "how to make money on youtube in spanish"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a horse runs while two women talk", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["sdvI1mHAsc", "w5W5Kqtc8E"], "start_seconds": ["20", "100"], "properties": ["two women, horse, run", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["w0xsN8X18Y", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["music, surface, rain", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "an engine runs loudly"], "sample_ids": ["zALy31PjDl0", "vqZuVbG6-HI"], "start_seconds": ["21", "130"], "properties": ["a man, a vehicle, a horn", "loud, engine, run"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "white noise and snoring with some rustling in the background"], "sample_ids": ["vqZuVbG6-HI", "xzKKf9bKNUo"], "start_seconds": ["130", "10"], "properties": ["background, male, female", "background, noise, snoring"], "captions_pred_video": ["footage is blurry because it's raining outside", "shows a woman laying on a bed with her eyes closed and her mouth open"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a person snoring loudly"], "question": "which entity has a background of noise?", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "waves crash against a shoreline and people speak"], "sample_ids": ["sQGXqGcwOTc", "yFB25fqfU8I"], "start_seconds": ["3", "300"], "properties": ["audio, kid, giggles", "wave, crash, shoreline"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage of a person surfing in the ocean"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a video", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "a man speaks as a car is passing by"], "sample_ids": ["wyllXV6PjKo", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["a kid, talk, cry", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a woman speaks and a baby cries", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a car passing by?", "label": 1}, {"captions": ["multiple adults speaking, and a child shouting in the background", "an emergency vehicle engine runs then a horn blows and siren sounds"], "sample_ids": ["yks4cLgIDMc", "y2bVZ7rz-5M"], "start_seconds": ["170", "280"], "properties": ["background, speaking, child", "engine, horn, siren"], "captions_pred_video": ["footage of two kids wrestling on the floor", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man is speaking and a child is crying", "a truck is honking its horn and a siren is blaring "], "question": "which entity has a horn?", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "a chime of a clock followed by various tones of ticking with come clinking"], "sample_ids": ["tDVADusiIoc", "uqFtmnhuqA8"], "start_seconds": ["60", "30"], "properties": ["wind, radio, waves", "a, b, c"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "shows a clock on the wall of a room with a person standing in front of it"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "mechanisms are ticking and a hammer is striking "], "question": "which entity is a clock?", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["vBslzh7saPw", "sLUnaPT5gM8"], "start_seconds": ["90", "0"], "properties": ["power, scream, increase", "loud, laughter, intermittent"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "loud, continuous burping"], "sample_ids": ["vlJS7LN2XyM", "y636gklDioE"], "start_seconds": ["30", "20"], "properties": ["background, clocks, ticking", "loud, continuous, burping"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "a dog sitting on a red chair in front of an old telephone"], "captions_pred_audio": ["a ticktock of a clock", "a person burps loudly several times"], "question": "which entity is louder", "label": 1}, {"captions": ["a male speaks over some small clicks", "a man speaks followed by another man speaking outside"], "sample_ids": ["uXxVebHsGZ8", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["male, clicks, speak", "two men, speak, follow"], "captions_pred_video": [null, "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a person snoring several times", "people speaking indiscriminately in the distance with a person snoring loudly nearby"], "sample_ids": ["spJCm8tD9Zo", "w2JXXIAdUdg"], "start_seconds": ["90", "10"], "properties": ["snore, person, several", "snoring, distance, person"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a close up shot of a person's mouth with a toothbrush in it"], "captions_pred_audio": ["a person is snoring loudly", "a person snoring and a dog whimpering"], "question": "which person is snoring", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "a propeller rotates loudly and intensely"], "sample_ids": ["sQGXqGcwOTc", "ugHJF0hfYkg"], "start_seconds": ["3", "10"], "properties": ["cling, speak, dishes", "loud, intense, propeller"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "winds blows roughly as a vehicle races past"], "sample_ids": ["wz7N8YRy74I", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["rooster, crow, background, people", "wind, blows, vehicle"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a jet engine roars and wind blows "], "question": "which entity is about a vehicle racing past?", "label": 1}, {"captions": ["two frogs croak at each other", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["zg0X6BnhOLQ", "tdWhHV3X25Q"], "start_seconds": ["410", "60"], "properties": ["two frogs, croak, at each other", "applause, audience, yells"], "captions_pred_video": ["footage of lightning in the sky at night", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a frog is croaking", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["an electric engine works nearby followed by a child talking", "a car accelerates and wind blows"], "sample_ids": ["xSKJGCItUWE", "u0TrcHhkPQ"], "start_seconds": ["10", "20"], "properties": ["engine, work, child", "accelerates, wind, blows"], "captions_pred_video": ["footage of the helicopter flying in the room", null], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["zF8yoL0rkbI", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["engine, run, someone", "a woman, laughs, animal"], "captions_pred_video": ["footage of the traffic on the street at night", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["an infant crying as a woman laughs", "multiple people speak and children yell while water gurgles"], "sample_ids": ["xhmRY9yhC7c", "vb1fPSDI4c"], "start_seconds": ["20", "30"], "properties": ["a, laugh, infant", "multiple, people, yell"], "captions_pred_video": ["of a baby crying in a baby bouncer", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["water rushes by", "heavy rain splashes as it falls"], "sample_ids": ["x-PeY8Yb8M4", "wP8ZKrlx3oA"], "start_seconds": ["300", "40"], "properties": ["water, rushes, by", "fall, rain, splash"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "footage of a flooded street in the middle of a desert with mountains in the background"], "captions_pred_audio": ["a car is driving on a wet road ", "a heavy rain is falling on a surface"], "question": "which entity is falling", "label": 1}, {"captions": ["children cry and people talk", "a man speaks as a car is passing by"], "sample_ids": ["xLwHe825Zs", "sK4u5T8hW78"], "start_seconds": ["18", "30"], "properties": ["people talk, children cry, people talk", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["vehicles pass by on a roadway", "wind blows as people chatter quietly"], "sample_ids": ["tgbONvsP47Y", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["pass, vehicle, roadway", "wind, chatter, people"], "captions_pred_video": ["footage of a fire truck entering a garage", "footage is blurry and out of focus"], "captions_pred_audio": ["a car is driving on the road ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "a vehicle engine accelerating then running on idle"], "sample_ids": ["vSeGhaZt-aI", "vYkA3cfXp5Q"], "start_seconds": ["50", "30"], "properties": ["water, bubbles, speak", "engine, accelerate, idle"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "an engine is idling"], "question": "which entity is a moving object", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["w2M4i1mklOA", "tw76HGONaKg"], "start_seconds": ["30", "570"], "properties": ["loud, chime, bell", "A, game, keyboard"], "captions_pred_video": ["footage of an antique clock", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a man speaks and types on a computer keyboard "], "question": "which entity is playing a game", "label": 1}, {"captions": ["a horn honks and then loudly blares", "someone whistles a tune"], "sample_ids": ["wnpJndXuxLc", "sIXTftIuUgw"], "start_seconds": ["50", "90"], "properties": ["horn, honk, loud", "someone, tune, whistle"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", null], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a person whistling a song"], "question": "which is a musical instrument", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "water flows and trickles"], "sample_ids": ["wSVhSdj0F0", "tB7hWb9gTuQ"], "start_seconds": ["10", "30"], "properties": ["horn honks, keys jingle, slam", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "people applaud and hoot and chat quietly"], "sample_ids": ["vlS6YMeWAPo", "wwyfGO2J4"], "start_seconds": ["40", "90"], "properties": ["sheep, baa, birds", "people, applaud, hoot"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", null], "captions_pred_audio": ["a goat bleats and birds chirp", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "a woman speaks as she rubs two objects together"], "sample_ids": ["xyL9F5VrjkE", "vzxHnu-SFEw"], "start_seconds": ["20", "80"], "properties": ["wind, blows, vehicle", "two objects, woman, speak"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a physical action", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "frogs croak and vocalize"], "sample_ids": ["tK4VlLsNxak", "yswmmRZFItk"], "start_seconds": ["120", "0"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "croak, vocalize, frog"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "a close up of a frog in the water"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a frog is croaking"], "question": "which entity is a frog", "label": 1}, {"captions": ["birds chirp and a pop occurs before a man speaks", "birds chirp and a dog breathes heavily"], "sample_ids": ["zuua6-5goWw", "y2ZBGpgbhHM"], "start_seconds": ["30", "30"], "properties": ["sound, pop, bird", "dog, chirp, breathe"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", null], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "birds chirping and a dog panting"], "question": "which entity is about a dog?", "label": 1}, {"captions": ["an infant crying frantically", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["zwOBqeFTgiU", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["cry, infant, frantically", "engine, idle, woman"], "captions_pred_video": ["of the baby crying in the car seat", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a baby cries loudly", "a woman is speaking and a subway train is moving "], "question": "which entity is a human", "label": 1}, {"captions": ["an engine starts and increases in power", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["zjTG0gaGCUI", "yDoT73BWsdA"], "start_seconds": ["80", "10"], "properties": ["power, increase, engine", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["food is frying then a woman speaks", "water flows and trickles"], "sample_ids": ["ukxt9I7eMMg", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["food, woman, speak", "water, flow, trickle"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a diesel truck engine runs continuously", "a man speaks as a car is passing by"], "sample_ids": ["sZvwOuuPGP0", "sK4u5T8hW78"], "start_seconds": ["50", "30"], "properties": ["engine, diesel, truck", "a, car, pass"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a medium engine is running ", "a man is speaking with background noise and breathing sounds "], "question": "which is not a vehicle", "label": 1}, {"captions": ["people speak then an engine runs", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["uMTTDZ2mb4", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["engine, run, people", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["people are talking and a car is driving by with wind noise in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a person speaking and then an engine runs?", "label": 0}, {"captions": ["a railroad crossing bell rings as a train horn blows", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["tZGN5a7ybxo", "yDoT73BWsdA"], "start_seconds": ["60", "10"], "properties": ["ring, train, horn", "engine, revs, vehicle"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a train is moving and blowing its horn ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a person whistles a meandering tune", "people cheer as a vehicle engine revs"], "sample_ids": ["uFoga8sHpiw", "xjhAnI2q6hM"], "start_seconds": ["90", "6"], "properties": ["person, tune, whistle", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a bird in a cage", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a person whistles a song", "a truck is revving its engine and a man is speaking "], "question": "which entity is more likely to be in a vehicle", "label": 1}, {"captions": ["a vehicle engine runs and someone speaks", "some men converse over an engine running"], "sample_ids": ["zF8yoL0rkbI", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["engine, run, someone", "men, converse, engine"], "captions_pred_video": ["footage of the traffic on the street at night", null], "captions_pred_audio": ["the wind is blowing hard and water is splashing", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a running engine", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["v5P-ThUCINM", "w34HjHr6gAY"], "start_seconds": ["400", "30"], "properties": ["background, chirp, bird", "beeps, hit, woman"], "captions_pred_video": [null, "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "some men converse over an engine running"], "sample_ids": ["tDVADusiIoc", "sCiy7QS1U"], "start_seconds": ["60", "300"], "properties": ["water, radio, man", "men, converse, engine"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a man speaking over a radio as wind blows and water splashes?", "label": 0}, {"captions": ["someone is typing on a computer keyboard", "a woman speaks as she rubs two objects together"], "sample_ids": ["v0x1odnXtP0", "vzxHnu-SFEw"], "start_seconds": ["210", "80"], "properties": ["keyboard, type, computer", "two objects, woman, speak"], "captions_pred_video": ["how to make money on youtube in spanish", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a person is typing on a keyboard", "a woman is speaking and breathing with mechanisms in the background "], "question": "which object is being rubbed together", "label": 0}, {"captions": ["a woman talks while something is fried and objects are tapped", "water is sprayed across a hard surface"], "sample_ids": ["yajyRTUQk3U", "sQwlkXjQabo"], "start_seconds": ["400", "10"], "properties": ["a woman, something, fried", "water, spray, surface"], "captions_pred_video": ["- a woman cooking in the kitchen", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "a man speaks as a car is passing by"], "sample_ids": ["xV7Mg1QucSc", "sK4u5T8hW78"], "start_seconds": ["14", "30"], "properties": ["alarm, ticktocks, laughs", "a, car, pass"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["an airplane engine runs", "wind blowing followed by a zoom"], "sample_ids": ["yVPZ2MNWpms", "vr8ZXjEBhMQ"], "start_seconds": ["0", "150"], "properties": ["engine, airplane, runs", "wind, blow, zoom"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a car is driving by on the road ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is not a zoom?", "label": 0}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "a woman speaks happily and an animal chirps"], "sample_ids": ["wqZ135Ssz0", "uWAAAL4CIoc"], "start_seconds": ["60", "0"], "properties": ["man, woman, squawks", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a woman is speaking and a dog is barking "], "question": "which entity has a bird squawks accompanied by a man and woman speaking?", "label": 0}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "paper is crumpling consistently"], "sample_ids": ["tDlfY3nmx1A", "v5cSxLaHADY"], "start_seconds": ["160", "0"], "properties": ["applause, laugh, man", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "paper is crumpled and crinkled"], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a man speaks over intermittent keyboard taps"], "sample_ids": ["weDbePuc-Xc", "tw76HGONaKg"], "start_seconds": ["40", "570"], "properties": ["music, slaps, human", "audio, man, keyboard"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a man speaks and types on a computer keyboard "], "question": "which entity has a man speaking over intermittent keyboard taps?", "label": 1}, {"captions": ["birds chirp and objects are moved around", "a woman speaks as she rubs two objects together"], "sample_ids": ["yPUYU6t3rwo", "vzxHnu-SFEw"], "start_seconds": ["370", "80"], "properties": ["birds chirp, objects are moved around, birds", "two objects, woman, speak"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["insects buzz and a man speaks", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["humming of idling and revving engine with a man speaking", "a man speaks over intermittent keyboard taps"], "sample_ids": ["wqADXCzngMw", "tw76HGONaKg"], "start_seconds": ["340", "570"], "properties": ["audio, humming, revving", "audio, man, keyboard"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a man speaks and types on a computer keyboard "], "question": "which entity has a man speaking over it?", "label": 0}, {"captions": ["birds tweet and squawk", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["w1mlz3Pe4fU", "tdWhHV3X25Q"], "start_seconds": ["300", "60"], "properties": ["squawk, tweet, scream", "applause, audience, yells"], "captions_pred_video": ["of a bird in a cage", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["birds are chirping and singing", "a man is speaking and a crowd is clapping"], "question": "which entity is a human activity", "label": 1}, {"captions": ["birds chirp and wind blows", "a man speaks uses a drill"], "sample_ids": ["sxIvBMSavMQ", "x5eIC7S0fbg"], "start_seconds": ["210", "60"], "properties": ["birds, chirp, wind", "A man is speaking, uses a drill, and is a tool"], "captions_pred_video": ["beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a", "a person in surgical gloves is using a needle to remove a small object from a tooth"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a man is speaking and using a power tool "], "question": "which entity is a tool", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "a power tool runs and touches a surface"], "sample_ids": ["xfudFO976zE", "zfvPRf3chY"], "start_seconds": ["0", "290"], "properties": ["animal, bleats, cry", "power tool, run, touch"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a man is speaking while a power tool is being used "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "a airplane flies overhead as a woman speaks"], "sample_ids": ["tQWGZLItBXk", "zj2R0XoFr5k"], "start_seconds": ["170", "50"], "properties": ["voice, music, whoosh", "airplane, fly, woman"], "captions_pred_video": ["worms revolution screenshots", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a woman speaks while a helicopter flies overhead "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["a person is snoring while sleeping", "water flows as men speak and yell"], "sample_ids": ["vJrjSeP17yE", "vJ7JPEFhyLA"], "start_seconds": ["40", "16"], "properties": ["a person is sleeping, snoring, person", "water, flow, men"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a person speaking and yelling?", "label": 1}, {"captions": ["an adult woman speaks over chopping and silverware noises", "waves crash against a shoreline and people speak"], "sample_ids": ["yYJksgsxx5U", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["audio, woman, silverware", "wave, crash, shoreline"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a video", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "a person snores loudly multiple times at a close distance"], "sample_ids": ["y2ZBGpgbhHM", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["birds, tweet, pant", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds chirping and a dog panting", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "vehicles pass by on a roadway"], "sample_ids": ["uPDn2BFTHk", "tgbONvsP47Y"], "start_seconds": ["140", "0"], "properties": ["woman, laughs, speaks", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "a telephone rings followed by a woman talking"], "sample_ids": ["uYT5gxnyMWM", "tGcFnX0GHI"], "start_seconds": ["50", "0"], "properties": ["female, spraying, scream", "ring, talk, woman"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a conversation", "label": 1}, {"captions": ["continuous snoring", "someone whistles a tune"], "sample_ids": ["sLkeqCDJIyw", "sIXTftIuUgw"], "start_seconds": ["120", "90"], "properties": ["loud, snoring, noise", "someone, tune, whistle"], "captions_pred_video": [", what is the man doing on the couch? sleeping", null], "captions_pred_audio": ["a person is snoring loudly", "a person whistling a song"], "question": "which noise is quieter", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "someone snores nearby"], "sample_ids": ["w2JXXIAdUdg", "spJCm8tD9Zo"], "start_seconds": ["10", "90"], "properties": ["snoring, distance, person", "someone snores, nearby, someone"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a person is snoring loudly"], "question": "which entity is a person?", "label": 0}, {"captions": ["leaves rustle while man speaks", "small dogs yip and bark sharply"], "sample_ids": ["zOZleIRqZm4", "v-wcQf4BDY0"], "start_seconds": ["80", "120"], "properties": ["leaves, rustle, speak", "bark, yip, sharply"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a dog barks and growls"], "question": "which entity is more quiet", "label": 0}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a man speaks and is typing on a keyboard"], "sample_ids": ["xKB8O8LTs6s", "x9JovgqUcs"], "start_seconds": ["70", "500"], "properties": ["music, radio, gunshots", "a, man, speaks, keyboard"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", null], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a man speaks and types on a keyboard"], "question": "which entity is typing on a keyboard?", "label": 1}, {"captions": ["a man is filing a hard object", "a propeller rotates loudly and intensely"], "sample_ids": ["vveS8HT7Uog", "ugHJF0hfYkg"], "start_seconds": ["100", "10"], "properties": ["a man, hard, object", "loud, intense, propeller"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a helicopter is flying overhead "], "question": "which object is rotating loudly", "label": 1}, {"captions": ["a person snores loudly multiple times at a close distance", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["sSMl2vc3ek", "uEU-Hg5MTN8"], "start_seconds": ["20", "27"], "properties": ["loud, multiple, distance", "animal, grunts, snorts"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 0}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["vzceMbklWc", "yDoT73BWsdA"], "start_seconds": ["180", "10"], "properties": ["water, faucet, sink", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["water is running and a man is speaking", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man speaks as bees buzz and birds chirp", "a toilet flushes and a female speaks"], "sample_ids": ["t25U-v4k4ts", "yaln9y8I7ms"], "start_seconds": ["40", "230"], "properties": ["bees buzz, birds chirp, man speaks", "female, flushes, toilet"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a toilet flushes and a man speaks"], "question": "which entity is a man speaking?", "label": 0}, {"captions": ["vehicle engines race around a track as a man commentates", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["sZPuqDgX2V0", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["commentator, race, track", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a man is speaking with wind noise in the background "], "question": "which entity is a video of a race?", "label": 0}, {"captions": ["a heavy rain falls endlessly", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["wP8ZKrlx3oA", "yajyRTUQk3U"], "start_seconds": ["40", "400"], "properties": ["heavy, rain, fall", "a woman, something, fried"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a woman is speaking while food is frying in the background"], "question": "which entity is a video of something being fried?", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["un9VQlzgZM", "y8WEcpOlT3I"], "start_seconds": ["5", "40"], "properties": ["females, talk, laugh", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is about a harsh wind blowing?", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "a child speaks in closed space"], "sample_ids": ["rwTERCUno", "yW6FWLSLkx4"], "start_seconds": ["90", "40"], "properties": ["engine, idle, sputter", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["an engine is idling and vibrating", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["tDlfY3nmx1A", "vlS6YMeWAPo"], "start_seconds": ["160", "40"], "properties": ["applause, laugh, man", "sheep, baa, birds"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "a telephone rings followed by a woman talking"], "sample_ids": ["uRExseg-0XI", "tGcFnX0GHI"], "start_seconds": ["210", "0"], "properties": ["woman, man, water", "ring, talk, woman"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", null], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a dial tone sounds followed by a woman speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "several insects fly while two men talk"], "sample_ids": ["wRV8yMk886E", "s-T9OVOiMLo"], "start_seconds": ["0", "330"], "properties": ["liquid, spray, nozzle", "several, fly, men"], "captions_pred_video": ["two cars are parked in a parking lot at night", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more insects", "label": 1}, {"captions": ["a door opens and closes", "a man speaks while playing a video game on a keyboard"], "sample_ids": ["vBHyYJ8pL0", "tw76HGONaKg"], "start_seconds": ["2", "570"], "properties": ["open, close, door", "A, game, keyboard"], "captions_pred_video": [null, "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a man speaks and types on a computer keyboard "], "question": "which entity is a person", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["sQGXqGcwOTc", "wqZ135Ssz0"], "start_seconds": ["3", "60"], "properties": ["cling, speak, dishes", "two men, woman, birds"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", null], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["people applaud and hoot and chat quietly", "wind blowing followed by a zoom"], "sample_ids": ["wwyfGO2J4", "vr8ZXjEBhMQ"], "start_seconds": ["90", "150"], "properties": ["people, applaud, hoot", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["wind blows strongly and a young man speaks", "a duck quacks continuously"], "sample_ids": ["vs65y4qmyBE", "vh30P49Po6s"], "start_seconds": ["340", "30"], "properties": ["wind, blows, strongly", "quacks, continuously, duck"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a car accelerates and wind blows"], "sample_ids": ["vlJS7LN2XyM", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["background, clocks, ticking", "accelerates, wind, blows"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", null], "captions_pred_audio": ["a ticktock of a clock", "a race car accelerates and revs its engine "], "question": "which entity is more active", "label": 1}, {"captions": ["a motorcycle engine works nearby", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["tOSWIURC-4", "w5W5Kqtc8E"], "start_seconds": ["0", "100"], "properties": ["engine, work, nearby", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a lawn mower is running ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a vehicle engine?", "label": 1}, {"captions": ["a clang followed by a toilet flushing", "a car accelerates and wind blows"], "sample_ids": ["wNZ5thZM7XU", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["sound, flush, toilet", "accelerates, wind, blows"], "captions_pred_video": ["footage of a toilet in a bathroom stall", null], "captions_pred_audio": ["a toilet flushes", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a person speaks over rustling leaves", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["zOZleIRqZm4", "vfYTJq7nU"], "start_seconds": ["80", "130"], "properties": ["rustling, leaves, person", "rustling, ducks, quack"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a duck quacks and a woman speaks"], "question": "which entity has a person speaking over rustling leaves?", "label": 0}, {"captions": ["wind blows strongly", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["w8uLijTqtlU", "vbZ-0lGPneg"], "start_seconds": ["70", "30"], "properties": ["wind, blows, strongly", "a woman, a television program, a bird"], "captions_pred_video": ["footage is blurry and shaky", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["the wind is blowing strongly", "a woman is speaking and a dog is whimpering"], "question": "which entity is a person", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["uJV8NDaHqqk", "y8WEcpOlT3I"], "start_seconds": ["100", "40"], "properties": ["loud, fly, chirp", "harsh, wind, blows"], "captions_pred_video": ["a bee hive in a wooden box", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["continuous sneezing together with speech", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["x4dZyf9Gbj0", "uYT5gxnyMWM"], "start_seconds": ["130", "50"], "properties": ["continuous, sneeze, speech", "female, spraying, scream"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman sneezes and speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["an airplane engine runs", "a toilet flushes and water sputters as it drains"], "sample_ids": ["yVPZ2MNWpms", "smGI3C1NZc"], "start_seconds": ["0", "30"], "properties": ["engine, airplane, runs", "water, drain, toilet"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", null], "captions_pred_audio": ["a car is driving by on the road ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "a woman speaks happily and an animal chirps"], "sample_ids": ["xzKKf9bKNUo", "uWAAAL4CIoc"], "start_seconds": ["10", "0"], "properties": ["background, noise, snoring", "a woman, chirps, animal"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", null], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking and a dog is barking "], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a vehicle engine accelerating then running on idle"], "sample_ids": ["tDVADusiIoc", "vYkA3cfXp5Q"], "start_seconds": ["60", "30"], "properties": ["water, radio, man", "engine, accelerate, idle"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "a machine beeps continuously"], "sample_ids": ["y8dSeubCNI", "y682ml90jGw"], "start_seconds": ["4", "11"], "properties": ["men, women, car", "beeps, machine, continuously"], "captions_pred_video": [null, null], "captions_pred_audio": ["an engine revving and people talking in the background", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vlS6YMeWAPo", "xKB8O8LTs6s"], "start_seconds": ["40", "70"], "properties": ["noise, bleat, call", "music, gunfire, explosion"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a goat bleats and birds chirp", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more active", "label": 1}, {"captions": ["a airplane flies overhead as a woman speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["zj2R0XoFr5k", "tDVADusiIoc"], "start_seconds": ["50", "60"], "properties": ["airplane, fly, woman", "water, radio, man"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a train horn sounds as the train approaches", "a horn rings out as a machine runs by"], "sample_ids": ["slZLHwNbbt4", "slZLHwNbbt4"], "start_seconds": ["300", "300"], "properties": ["train, horn, sound", "a, horn, run"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a clock alarm sounds and gears turn", "a duck quacks continuously"], "sample_ids": ["w2M4i1mklOA", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["alarm, gears, turn", "quacks, continuously, duck"], "captions_pred_video": ["footage of an antique clock", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["the revving of an engine throttle followed by a man speaking", "wind blowing followed by a zoom"], "sample_ids": ["tezvROoo4bs", "vr8ZXjEBhMQ"], "start_seconds": ["40", "150"], "properties": ["audio, throttle, speaking", "wind, blow, zoom"], "captions_pred_video": ["footage of a busy city street with cars parked on both sides of the road", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a car accelerates and revs while a man speaks ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a video", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vzxHnu-SFEw", "vb1fPSDI4c"], "start_seconds": ["80", "30"], "properties": ["two objects, woman, speak", "multiple, people, yell"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", null], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a crowd of people are talking and laughing"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["pigeons vocalize and birds chirp", "birds chirp and objects are moved around"], "sample_ids": ["uiS58TNyUiw", "yPUYU6t3rwo"], "start_seconds": ["430", "370"], "properties": ["vocalize, bird, chirp", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of the pigeon in the cage", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "insects buzz and a man speaks"], "question": "which entity is a video of birds chirping?", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "water drips and bubbles as a man speaks"], "sample_ids": ["wvKpEYswXO0", "vSeGhaZt-aI"], "start_seconds": ["150", "50"], "properties": ["plastic, tap, speak", "water, bubbles, speak"], "captions_pred_video": ["of the person preparing food in the kitchen", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking and pouring liquid with background noise "], "question": "which entity is a liquid", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "an engine runs loudly"], "sample_ids": ["wDVMhEdTiVw", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["gun, shoot, water", "loud, engine, run"], "captions_pred_video": ["a blurry image of trees and water in the forest", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "an airplane engine spools and people speak"], "sample_ids": ["wyllXV6PjKo", "wTjoRj1se3U"], "start_seconds": ["30", "390"], "properties": ["a kid, talk, cry", "airplane, engine, spool"], "captions_pred_video": [null, "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a woman speaks and a baby cries", "a jet engine is running and people are talking"], "question": "which entity is about a kid?", "label": 0}, {"captions": ["someone snores nearby", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["spJCm8tD9Zo", "tdWhHV3X25Q"], "start_seconds": ["90", "60"], "properties": ["someone snores, nearby, someone", "applause, audience, yells"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "vehicles pass by on a roadway"], "sample_ids": ["yswmmRZFItk", "tgbONvsP47Y"], "start_seconds": ["0", "0"], "properties": ["background, frog, croak", "pass, vehicle, roadway"], "captions_pred_video": ["a close up of a frog in the water", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a frog is croaking", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a stream runs then someone speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["wbHTKEJZyhc", "wz7N8YRy74I"], "start_seconds": ["20", "30"], "properties": ["stream, run, someone", "rooster, crow, background, men"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vf9xf3vMsGM", "xfaoyyzw2WU"], "start_seconds": ["540", "180"], "properties": ["A man speaks while turning a water faucet on.", "loud, jet engine, roar"], "captions_pred_video": ["of the person washing their hands under the faucet", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking while water is running in the background", "an aircraft engine roars and a man speaks "], "question": "which entity is quieter", "label": 0}, {"captions": ["a woman speaks and is crumpling paper", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["xvDdE3zNf8Y", "zj2R0XoFr5k"], "start_seconds": ["120", "50"], "properties": ["A, crumple, paper", "airplane, boy, fly"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman speaks and crumples paper", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "a car accelerates and wind blows"], "sample_ids": ["sDSppXIlJrs", "u0TrcHhkPQ"], "start_seconds": ["27", "20"], "properties": ["microphone, water, wind", "accelerates, wind, blows"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", null], "captions_pred_audio": ["the wind is blowing and water is splashing", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["zsLxS-uLJTw", "w5W5Kqtc8E"], "start_seconds": ["20", "100"], "properties": ["horn, blast, train", "wind, blow, vehicle"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", null], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a vehicles accelerate quickly and someone laughs", "a person whistles a meandering tune"], "sample_ids": ["uWPRNLnpy7Y", "uFoga8sHpiw"], "start_seconds": ["10", "90"], "properties": ["accelerate, laugh, vehicle", "person, tune, whistle"], "captions_pred_video": ["is taken from a car driving down the street", "footage of a bird in a cage"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a person whistles a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a woman speaks and a baby laughs", "paper folding and crinkling"], "sample_ids": ["tOj4tdLRaA", "zPpG3RD8lSs"], "start_seconds": ["70", "20"], "properties": ["woman, laugh, baby", "paper, fold, crinkle"], "captions_pred_video": [null, "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a baby laughs and a woman speaks", "the wind blows and a mouse clicks "], "question": "which entity is not a person", "label": 1}, {"captions": ["an adult woman and an adult man speak", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["zTLVJCo4WEE", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["two people, adult, speak", "animal, grunts, snorts"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a woman is speaking and a baby is crying"], "question": "which entity has a more snorts", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "waves crash against a shoreline and people speak"], "sample_ids": ["uRExseg-0XI", "yFB25fqfU8I"], "start_seconds": ["210", "300"], "properties": ["woman, man, water", "wave, crash, shoreline"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more active", "label": 1}, {"captions": ["male speech with light ticking", "water is sprayed across a hard surface"], "sample_ids": ["xO-Q2BlIIPU", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["male, speech, ticking", "water, spray, surface"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "water is sprayed across a hard surface"], "sample_ids": ["rwTERCUno", "sQwlkXjQabo"], "start_seconds": ["90", "10"], "properties": ["engine, idle, sputter", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["an engine is idling and vibrating", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "a clock ticktocks"], "sample_ids": ["w2M4i1mklOA", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["loud, chime, bell", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of an antique clock", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a clock is ticking loudly"], "question": "which entity is quieter", "label": 1}, {"captions": ["a woman speaks and other women and a man talk with her", "long loud burping by a man"], "sample_ids": ["vbpKkWvfOu4", "xmiUIOhtZyQ"], "start_seconds": ["560", "60"], "properties": ["a, woman, man", "loud, burp, man"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "homer simpson drinking a beer"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a person burps and music plays in the background "], "question": "which entity is a burp", "label": 1}, {"captions": ["women speak and laugh as wind blows", "someone whistles a tune"], "sample_ids": ["un9VQlzgZM", "sIXTftIuUgw"], "start_seconds": ["5", "90"], "properties": ["wind, speak, laugh", "someone, tune, whistle"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["birds chirp and objects are moved around", "a duck quacks continuously"], "sample_ids": ["yPUYU6t3rwo", "vh30P49Po6s"], "start_seconds": ["370", "30"], "properties": ["birds chirp, objects are moved around, birds", "quacks, continuously, duck"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["insects buzz and a man speaks", "a duck is quacking loudly"], "question": "which entity is a bird", "label": 0}, {"captions": ["a woman and man are speaking", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["vbpKkWvfOu4", "uYT5gxnyMWM"], "start_seconds": ["560", "50"], "properties": ["two people, speaking, woman, man", "a, scream, girl"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a woman is speaking and a baby is crying"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "a wooden clack accompanies nearby chirping birds"], "sample_ids": ["sWZzXuWYY", "yeFvk9x0wWI"], "start_seconds": ["420", "30"], "properties": ["male, clanks, thumps", "clack, bird, chirp"], "captions_pred_video": [null, "a mouse in a cage on the sidewalk in front of a fence"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "birds chirp in the background as a car drives by "], "question": "which entity is accompanied by birds", "label": 1}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "a vehicle engine accelerating then running on idle"], "sample_ids": ["zgUgkpk78xU", "vYkA3cfXp5Q"], "start_seconds": ["70", "30"], "properties": ["clinking, humming, horn", "engine, accelerate, idle"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "a vehicle accelerates and squeals tires"], "sample_ids": ["xjvTpk2Zpr8", "yRx9txMcBl0"], "start_seconds": ["70", "40"], "properties": ["engine, run, wind", "accelerates, tires, squeals"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a car is revving its engine and skidding "], "question": "which entity is moving", "label": 1}, {"captions": ["an airplane accelerates briefly", "someone sprays a liquid onto a hard surface making a hiss sound"], "sample_ids": ["zjTG0gaGCUI", "zO-LSSY92ZM"], "start_seconds": ["80", "30"], "properties": ["accelerates, airplane, briefly", "liquid, surface, sound"], "captions_pred_video": [null, "youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car's air filter youtube how to clean your car'"], "captions_pred_audio": ["a jet engine roars as wind blows ", "steam is hissing and hissing"], "question": "which entity is not a liquid?", "label": 0}, {"captions": ["speaking following by laughing and clapping", "several insects fly while two men talk"], "sample_ids": ["u2f5NpsoHBg", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["person, laugh, clap", "several, fly, men"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a video of a person speaking and laughing and clapping?", "label": 0}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a child speaks in closed space"], "sample_ids": ["uEU-Hg5MTN8", "yW6FWLSLkx4"], "start_seconds": ["27", "40"], "properties": ["a woman, laughs, animal", "child, space, speak"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a baby cries and a woman speaks", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tMbMDvT50j8", "zj2R0XoFr5k"], "start_seconds": ["12", "50"], "properties": ["a, cry, woman", "airplane, boy, fly"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a baby cries and a woman speaks", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a baby crying and a woman speaking?", "label": 0}, {"captions": ["dishes cling together then a man begins to speak", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sQGXqGcwOTc", "xKB8O8LTs6s"], "start_seconds": ["3", "70"], "properties": ["cling, speak, dishes", "music, gunfire, explosion"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "a stream of water runs briefly"], "sample_ids": ["wTjoRj1se3U", "x-PeY8Yb8M4"], "start_seconds": ["390", "300"], "properties": ["engine, run, people", "stream, water, run"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a jet engine is running and people are talking", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a vehicle engine accelerating then running on idle", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vYkA3cfXp5Q", "xfaoyyzw2WU"], "start_seconds": ["30", "180"], "properties": ["engine, accelerate, idle", "loud, jet engine, roar"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["an engine is idling", "an aircraft engine roars and a man speaks "], "question": "which engine is louder", "label": 1}, {"captions": ["ducks quack as a man speaks and makes a duck sound", "water pouring and bubbling"], "sample_ids": ["vfYTJq7nU", "uyRfq-jKPpo"], "start_seconds": ["130", "50"], "properties": ["ducks, quack, man", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a duck quacks and a woman speaks", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "people applaud and hoot and chat quietly"], "sample_ids": ["vlJS7LN2XyM", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["background, clocks, ticking", "people, applaud, hoot"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", null], "captions_pred_audio": ["a ticktock of a clock", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "people speak as gunfire rings out"], "sample_ids": ["yRx9txMcBl0", "wqTCwqVRDlk"], "start_seconds": ["40", "80"], "properties": ["accelerates, tires, squeals", "gunfire, ring, speak"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["wind blows as people chatter quietly", "water flows and trickles"], "sample_ids": ["xBxDz0CFVn0", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["wind, chatter, people", "water, flow, trickle"], "captions_pred_video": ["footage is blurry and out of focus", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a vehicle engine accelerating then running on idle", "a clock ticktocks and sounds an alarm then a man laughs"], "sample_ids": ["vYkA3cfXp5Q", "xV7Mg1QucSc"], "start_seconds": ["30", "14"], "properties": ["engine, accelerate, idle", "alarm, ticktocks, laughs"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "a cuckoo clock hanging on the wall"], "captions_pred_audio": ["an engine is idling", "an alarm clock ticks and a woman laughs"], "question": "which entity is not a vehicle?", "label": 1}, {"captions": ["an engine runs and wind blows", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["vs65y4qmyBE", "xKB8O8LTs6s"], "start_seconds": ["340", "70"], "properties": ["engine, run, wind", "music, gunfire, explosion"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "an airplane engine spools and people speak"], "sample_ids": ["v7jJS8aAyA", "wTjoRj1se3U"], "start_seconds": ["10", "390"], "properties": ["wind, blows, loudly", "airplane, engine, spool"], "captions_pred_video": [null, "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a jet engine is running and people are talking"], "question": "which entity is a vehicle", "label": 1}, {"captions": ["water gurgles, metal squeaks and the water stops", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["x4a9YGIw4ok", "yajyRTUQk3U"], "start_seconds": ["120", "400"], "properties": ["water, gurgles, stops", "a woman, something, fried"], "captions_pred_video": ["footage is blurry and out of focus", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a toilet flushes and water splashes", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a man speaks over intermittent keyboard taps", "a duck quacks loudly and continuously"], "sample_ids": ["tw76HGONaKg", "vh30P49Po6s"], "start_seconds": ["570", "30"], "properties": ["audio, man, keyboard", "loud, continuous, quacks"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["various birds chirp and squeal, and an animal grunts", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tDlysoZiA1I", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["animal, grunts, chirps", "a woman, something, fried"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "- a woman cooking in the kitchen"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "some tunes played by whistling"], "sample_ids": ["wAAkbZToh8", "u6BnG6YZqJ4"], "start_seconds": ["0", "0"], "properties": ["burp, laugh, speak", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man burps and a woman speaks", "a person whistling a song"], "question": "which entity is playing a tune", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "people applaud and hoot and chat quietly"], "sample_ids": ["wvKpEYswXO0", "wwyfGO2J4"], "start_seconds": ["150", "90"], "properties": ["plastic, tap, speak", "people, applaud, hoot"], "captions_pred_video": ["of the person preparing food in the kitchen", null], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "people are clapping and speaking with background noise "], "question": "which entity is a group of people", "label": 1}, {"captions": ["a person snoring", "water flows and trickles"], "sample_ids": ["t8tv5YRMJUg", "tB7hWb9gTuQ"], "start_seconds": ["0", "30"], "properties": ["a person, snore, loud", "water, flow, trickle"], "captions_pred_video": ["of a man getting his face licked by another man", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a person sniffs and breathes heavily", "water is splashing and gurgling"], "question": "which entity is not loud", "label": 1}, {"captions": ["water flows followed by women screaming", "winds blows roughly as a vehicle races past"], "sample_ids": ["w5W5Kqtc8E", "xjvTpk2Zpr8"], "start_seconds": ["100", "70"], "properties": ["water, flow, women", "wind, blows, vehicle"], "captions_pred_video": [null, "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a jet engine roars and wind blows "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "a toilet flushes and water drains"], "sample_ids": ["wtDqrBygTcU", "sfAvvZwdLCY"], "start_seconds": ["30", "20"], "properties": ["man, engine, run", "water drains, flushes, water"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a man is speaking and a motor is running", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vbZ-0lGPneg", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["a woman, a television program, a bird", "airplane, boy, fly"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a woman speaks while a helicopter flies overhead "], "question": "which entity has a boy speaking?", "label": 1}, {"captions": ["a woman speaks and food sizzles while frying", "a car accelerates and wind blows"], "sample_ids": ["wTideSjRFS0", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["food, sizzle, woman", "accelerates, wind, blows"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["someone snores nearby", "a woman speaks happily and an animal chirps"], "sample_ids": ["spJCm8tD9Zo", "uWAAAL4CIoc"], "start_seconds": ["90", "0"], "properties": ["someone snores, nearby, someone", "a woman, chirps, animal"], "captions_pred_video": ["of a man laying on the ground with his mouth open", null], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking and a dog is barking "], "question": "which entity is more active", "label": 1}, {"captions": ["a stream of water flows quickly", "pigeons vocalize and birds chirp"], "sample_ids": ["wbHTKEJZyhc", "uiS58TNyUiw"], "start_seconds": ["20", "430"], "properties": ["stream, water, flow", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "of the pigeon in the cage"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a stream of water?", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sU53zg9Jp7s", "yajyRTUQk3U"], "start_seconds": ["380", "400"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "a woman, something, fried"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", "- a woman cooking in the kitchen"], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["vms5XGTDVQc", "zj2R0XoFr5k"], "start_seconds": ["220", "50"], "properties": ["paper, crumpled, crinkled", "airplane, boy, fly"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["paper is crumpled and crinkled", "a woman speaks while a helicopter flies overhead "], "question": "which entity is not flying?", "label": 0}, {"captions": ["an audience gives applause", "women speak and laugh as wind blows"], "sample_ids": ["x6iCUDmRpKQ", "un9VQlzgZM"], "start_seconds": ["38", "5"], "properties": ["applause, audience, give", "wind, speak, laugh"], "captions_pred_video": ["a black background with the moon and stars in the sky", null], "captions_pred_audio": ["a group of people are clapping and cheering", "a woman is speaking and laughing with wind noise and breathing in the background "], "question": "which entity is not a person?", "label": 0}, {"captions": ["various birds chirp and squeal, and an animal grunts", "a weapon fires multiple times"], "sample_ids": ["tDlysoZiA1I", "sMC07Ucy7kg"], "start_seconds": ["0", "10"], "properties": ["animal, grunts, chirps", "weapon, fire, multiple"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage is from a car's point of view"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["women speak and laugh as wind blows", "wind blowing followed by a zoom"], "sample_ids": ["un9VQlzgZM", "vr8ZXjEBhMQ"], "start_seconds": ["5", "150"], "properties": ["wind, speak, laugh", "wind, blow, zoom"], "captions_pred_video": [null, "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more likely to be a video of a wind blowing?", "label": 1}, {"captions": ["a person sniffles and sneezes", "a clock ticks quietly and rhythmically"], "sample_ids": ["uRlbY6aoBU", "u7C-AEBQM"], "start_seconds": ["0", "30"], "properties": ["sneezes, sniffles, person", "ticks, rhythmic, quiet"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is sneezing ", "a ticktock of a clock"], "question": "which entity is silent", "label": 1}, {"captions": ["a small musical boom and then birds tweet and a few dogs pant", "a stream of water runs briefly"], "sample_ids": ["y2ZBGpgbhHM", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["birds, tweet, pant", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["birds chirping and a dog panting", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a child speaks in closed space", "vehicles pass by on a roadway"], "sample_ids": ["yW6FWLSLkx4", "tgbONvsP47Y"], "start_seconds": ["40", "0"], "properties": ["child, space, speak", "pass, vehicle, roadway"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a person uses a saw to cut some wood", "some men converse over an engine running"], "sample_ids": ["sHbXC6na9hg", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["a person, saw, wood", "men, converse, engine"], "captions_pred_video": ["a man using a tractor to cut a log into firewood youtube", null], "captions_pred_audio": ["an engine is idling and vibrating", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a person cutting wood?", "label": 0}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "wind loudly blowing while people speak in the background followed by a horn blowing"], "sample_ids": ["vdoxuJn9lTc", "xjhAnI2q6hM"], "start_seconds": ["40", "6"], "properties": ["burp, loud, girl", "wind, blow, loudly"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a child speaks followed by a burp", "a truck is revving its engine and a man is speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "a woman speaks happily and an animal chirps"], "sample_ids": ["w2M4i1mklOA", "uWAAAL4CIoc"], "start_seconds": ["30", "0"], "properties": ["loud, chime, bell", "a woman, chirps, animal"], "captions_pred_video": ["footage of an antique clock", null], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a woman is speaking and a dog is barking "], "question": "which entity is quieter", "label": 1}, {"captions": ["a drill runs and two people laugh", "people speak as gunfire rings out"], "sample_ids": ["tEE3MpBt1sg", "wqTCwqVRDlk"], "start_seconds": ["50", "80"], "properties": ["two people, laugh, drill", "gunfire, ring, speak"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "water flows and trickles"], "sample_ids": ["uZesmtKZGSw", "tB7hWb9gTuQ"], "start_seconds": ["250", "30"], "properties": ["car, track, man", "water, flow, trickle"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "a person sniffs and sneezes"], "sample_ids": ["sZPuqDgX2V0", "uRlbY6aoBU"], "start_seconds": ["30", "0"], "properties": ["commentator, race, track", "sneezes, person, sniffs"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a man is sneezing "], "question": "which entity is not a person?", "label": 0}, {"captions": ["two men speak as a buffeting wind blows", "a female speaks softly as paper crinkles"], "sample_ids": ["y8WEcpOlT3I", "xvDdE3zNf8Y"], "start_seconds": ["40", "120"], "properties": ["wind, speak, buffeting", "a, female, speaks"], "captions_pred_video": ["on how to use a sewing machine youtube", "of a woman in a white shirt and glasses holding a purple tie"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman speaks and crumples paper"], "question": "which entity is speaking softly", "label": 1}, {"captions": ["a person speaks over rustling leaves", "paper is crumpling consistently"], "sample_ids": ["zOZleIRqZm4", "v5cSxLaHADY"], "start_seconds": ["80", "0"], "properties": ["rustling, leaves, person", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vddP56-ogds", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["water, flow, laugh", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "a infant makes noise and is excited"], "sample_ids": ["tDVADusiIoc", "wIJK3-5y0kA"], "start_seconds": ["60", "30"], "properties": ["man, radio, blows", "noise, excited, infant"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a baby cries and wails as an adult female speaks", "water pouring and bubbling"], "sample_ids": ["zliInBdC98Y", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["a, baby, cries, wails", "water, bubbles, pouring"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a baby cries and a woman speaks", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["water running down a sink while a man is talking", "a woman speaks as she rubs two objects together"], "sample_ids": ["vSeGhaZt-aI", "vzxHnu-SFEw"], "start_seconds": ["50", "80"], "properties": ["water, sink, talk", "two objects, woman, speak"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a man talking?", "label": 0}, {"captions": ["wind blows and people talk while livestock vocalizes", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vXlk0lIQBFo", "vb1fPSDI4c"], "start_seconds": ["470", "30"], "properties": ["wind, talk, vocalize", "multiple, people, yell"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", null], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a crowd of people are talking and laughing"], "question": "which entity has more people", "label": 1}, {"captions": ["a person is snoring while sleeping", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["vJrjSeP17yE", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["a person is sleeping, snoring, person", "a woman, laughs, animal"], "captions_pred_video": ["a black background with a small plane flying in the sky", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 0}, {"captions": ["some men converse over an engine running", "a man speaks as a car is passing by"], "sample_ids": ["sCiy7QS1U", "sK4u5T8hW78"], "start_seconds": ["300", "30"], "properties": ["men, converse, engine", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a goat screams and people speak in the background", "some tunes played by whistling"], "sample_ids": ["xC8kbrKJmco", "u6BnG6YZqJ4"], "start_seconds": ["0", "0"], "properties": ["background, goat, scream", "tune, play, whistling"], "captions_pred_video": [null, "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a goat is bleating ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a male speaks over some small clicks", "a clock ticktocks"], "sample_ids": ["uXxVebHsGZ8", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["male, clicks, speak", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["tapping occurs then a baby cries", "an infant crying frantically"], "sample_ids": ["wIJK3-5y0kA", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["a, cry, baby", "cry, infant, frantically"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "of the baby crying in the car seat"], "captions_pred_audio": ["a baby cries and a woman speaks", "a baby cries loudly"], "question": "which entity is crying frantically", "label": 1}, {"captions": ["a clock ticktocks in wind", "waves crash against a shoreline and people speak"], "sample_ids": ["yVumC9TGknc", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["ticktocks, clock, wind", "wave, crash, shoreline"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a series of beeps and chirps", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["a person snoring", "a man talks as something metal hits against and glass is set down"], "sample_ids": ["t8tv5YRMJUg", "x6ijhqRY38s"], "start_seconds": ["0", "250"], "properties": ["a person, snore, loud", "something metal, glass, hit"], "captions_pred_video": ["of a man getting his face licked by another man", "a chef preparing a dish with a bottle of wine and a plate of food on a table"], "captions_pred_audio": ["a person sniffs and breathes heavily", "a man is speaking and dishes are clanging "], "question": "which entity is not a person?", "label": 1}, {"captions": ["a heavy rain falls endlessly", "a vehicle engine accelerating then running on idle"], "sample_ids": ["wP8ZKrlx3oA", "vYkA3cfXp5Q"], "start_seconds": ["40", "30"], "properties": ["heavy, rain, fall", "engine, accelerate, idle"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a heavy rain is falling on a surface", "an engine is idling"], "question": "which entity is a moving object", "label": 1}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["yNtRmrn0io8", "uEU-Hg5MTN8"], "start_seconds": ["210", "27"], "properties": ["storm, distance, strike", "a woman, laughs, animal"], "captions_pred_video": ["footage of a house in the middle of the night", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["rain falls and thunder roars", "a woman is speaking and a baby is crying"], "question": "which entity is more likely to be a natural occurrence", "label": 0}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["su6FAOcOA8c", "wqZ135Ssz0"], "start_seconds": ["4", "60"], "properties": ["engine, run, woman", "two men, woman, birds"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", null], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has a woman making an announcement?", "label": 0}, {"captions": ["a woman speaks and a baby laughs", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["tOj4tdLRaA", "vfYTJq7nU"], "start_seconds": ["70", "130"], "properties": ["woman, laugh, baby", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby laughs and a woman speaks", "a duck quacks and a woman speaks"], "question": "which entity is about a baby?", "label": 0}, {"captions": ["a man speaks as a car is passing by", "a man speaks as a car is passing by"], "sample_ids": ["sK4u5T8hW78", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["a, car, pass", "a, car, pass"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a photograph", "label": 1}, {"captions": ["water runs into a sink while men speak", "dishes cling together then a man begins to speak"], "sample_ids": ["vzceMbklWc", "sQGXqGcwOTc"], "start_seconds": ["180", "3"], "properties": ["water, sink, run", "cling, speak, dishes"], "captions_pred_video": [null, "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["water is running and a man is speaking", "mechanisms are operating and water is splashing "], "question": "which entity is about a sink?", "label": 0}, {"captions": ["birds coo incessantly", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["yZrFNS7GFBQ", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["coo, bird, incessant", "music, gunfire, explosion"], "captions_pred_video": ["of the bird in the cage", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["an owl hoots in the background ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "water flows as men speak and yell"], "sample_ids": ["vzxHnu-SFEw", "vJ7JPEFhyLA"], "start_seconds": ["80", "16"], "properties": ["two objects, woman, speak", "water, flow, men"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water flowing?", "label": 1}, {"captions": ["a person is snoring while sleeping", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["vJrjSeP17yE", "vlS6YMeWAPo"], "start_seconds": ["40", "40"], "properties": ["a person is sleeping, snoring, person", "sheep, baa, birds"], "captions_pred_video": ["a black background with a small plane flying in the sky", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a person snoring loudly", "a goat bleats and birds chirp"], "question": "which entity is a person", "label": 0}, {"captions": ["an engine idles quietly then gradually becomes louder", "humming and rattling of an engine idling as it revs"], "sample_ids": ["vbr9mHKc8WM", "xMXvkIcaG0Y"], "start_seconds": ["40", "30"], "properties": ["noise, loudness, engine", "sound, humming, rattling"], "captions_pred_video": [null, "footage of a car's hood being opened up to reveal the engine underneath the hood"], "captions_pred_audio": ["an engine is idling", "an engine is revving and accelerating "], "question": "which entity is quieter", "label": 0}, {"captions": ["a woman speaks and then a man speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["vbpKkWvfOu4", "wz7N8YRy74I"], "start_seconds": ["560", "30"], "properties": ["a, man, speaks", "rooster, crow, background, men"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["a person is whistling a tune", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["scYRUkrFLiQ", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["a, tune, whistle", "a woman, a television program, a bird"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a person whistling a song", "a woman is speaking and a dog is whimpering"], "question": "which entity is a person?", "label": 0}, {"captions": ["repeated tapping is accompanied by water running and a woman speaking softly", "a infant makes noise and is excited"], "sample_ids": ["wvKpEYswXO0", "wIJK3-5y0kA"], "start_seconds": ["150", "30"], "properties": ["sound, water, running", "noise, excited, infant"], "captions_pred_video": ["of the person preparing food in the kitchen", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a baby cries and a woman speaks"], "question": "which entity is making noise", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["wRBHTgrbiwg", "sLUnaPT5gM8"], "start_seconds": ["50", "0"], "properties": ["bird, owl, speak", "loud, laughter, intermittent"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a jet engine roars, almost making a man inaudible", "a vehicle engine runs and someone speaks"], "sample_ids": ["xfaoyyzw2WU", "zF8yoL0rkbI"], "start_seconds": ["180", "30"], "properties": ["loud, jet engine, roar", "engine, run, someone"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "footage of the traffic on the street at night"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "the wind is blowing hard and water is splashing"], "question": "which entity is quieter", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["v5P-ThUCINM", "y2bVZ7rz-5M"], "start_seconds": ["400", "280"], "properties": ["background, chirp, bird", "motor noise, horn, siren"], "captions_pred_video": [null, "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a truck is honking its horn and a siren is blaring "], "question": "which entity has a horn honking?", "label": 1}, {"captions": ["wind noise takes place into a microphone while rustling occurs", "rain falls on a surface as men speak and thunder roars"], "sample_ids": ["w8uLijTqtlU", "w0xsN8X18Y"], "start_seconds": ["70", "30"], "properties": ["wind, microphone, noise", "rain, thunder, surface"], "captions_pred_video": ["footage is blurry and shaky", null], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking while a motorboat is moving in the background "], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["ylpYOorfH4o", "sLUnaPT5gM8"], "start_seconds": ["410", "0"], "properties": ["engine, running, wind", "loud, laughter, intermittent"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking and an engine is revving", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "paper is crumpling consistently"], "sample_ids": ["sjlVMgdGSK0", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["accelerates, vehicle, race car", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a car accelerates and revs its engine ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["an engine runs and wind blows", "a vehicle engine accelerates and wind blows"], "sample_ids": ["vs65y4qmyBE", "wudZTNBtVqc"], "start_seconds": ["340", "60"], "properties": ["engine, run, wind", "accelerates, engine, wind"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "footage is of a parking lot with cars parked in it"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a woman sneezes then speaks", "vehicles pass by on a roadway"], "sample_ids": ["x4dZyf9Gbj0", "tgbONvsP47Y"], "start_seconds": ["130", "0"], "properties": ["sneezes, speaks, woman", "pass, vehicle, roadway"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a woman sneezes and speaks", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks uses a drill", "a car accelerates and wind blows"], "sample_ids": ["x5eIC7S0fbg", "u0TrcHhkPQ"], "start_seconds": ["60", "20"], "properties": ["A man is speaking, uses a drill, and is a tool", "accelerates, wind, blows"], "captions_pred_video": ["a person in surgical gloves is using a needle to remove a small object from a tooth", null], "captions_pred_audio": ["a man is speaking and using a power tool ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "a man speaks as a motor runs in the background"], "sample_ids": ["wqZ135Ssz0", "xZepNM9qcRA"], "start_seconds": ["60", "30"], "properties": ["two men, woman, birds", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man speaks followed by another man speaking outside", "someone whistles a tune"], "sample_ids": ["viuTg1M-dqg", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["two men, speak, follow", "someone, tune, whistle"], "captions_pred_video": ["footage of water coming out of a hole in the ground", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a person whistling a song"], "question": "which is not a musical instrument", "label": 0}, {"captions": ["birds chirp then an animal grunts", "women speak as water runs briefly, children call out, and a man speaks"], "sample_ids": ["tDlysoZiA1I", "uRExseg-0XI"], "start_seconds": ["0", "210"], "properties": ["animal, grunt, chirp", "woman, man, water"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a man is speaking while water is running and birds are chirping "], "question": "which entity has more people", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "multiple people speak and children yell while water gurgles"], "sample_ids": ["t97k0cejSQE", "vb1fPSDI4c"], "start_seconds": ["250", "30"], "properties": ["bird, chirp, insect", "multiple, people, yell"], "captions_pred_video": ["a bee on a purple thistle flower", null], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["females talk and laugh over gusting wind", "a stream of water flows as people talk and wind blows"], "sample_ids": ["un9VQlzgZM", "xBxDz0CFVn0"], "start_seconds": ["5", "30"], "properties": ["females, talk, laugh", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "a heavy rain falls endlessly"], "sample_ids": ["tiDFTC-5vU", "wP8ZKrlx3oA"], "start_seconds": ["30", "40"], "properties": ["male, duck, laugh", "heavy, rain, fall"], "captions_pred_video": [null, "footage of a flooded street in the middle of a desert with mountains in the background"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a heavy rain is falling on a surface"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["an insect buzzes around continuously", "a car speeding up in the distance"], "sample_ids": ["v25l1jef3JY", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["buzzes, continuously, insect", "distance, car, speed"], "captions_pred_video": ["a black background with a cartoon character in the foreground", null], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["low humming with a clock ticking and birds chirping", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["yVumC9TGknc", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["humming, clock, birds", "rustling, ducks, quack"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", null], "captions_pred_audio": ["a series of beeps and chirps", "a duck quacks and a woman speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a guy speaks with birds chirping in the background", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["v5P-ThUCINM", "wDVMhEdTiVw"], "start_seconds": ["400", "30"], "properties": ["background, chirp, bird", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking and birds are chirping", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["xyL9F5VrjkE", "uZesmtKZGSw"], "start_seconds": ["20", "250"], "properties": ["wind, motor, distance", "men, talk, cars"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["scraping and female speech with distant music", "a drill runs and two people laugh"], "sample_ids": ["yHeVV-xeOxQ", "tEE3MpBt1sg"], "start_seconds": ["130", "50"], "properties": ["female, speech, music", "two people, laugh, drill"], "captions_pred_video": ["of a girl milking a goat's udder", "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["a woman is speaking and scraping sounds are heard in the background ", "people are laughing breathing and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sOa7g-44Dag", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["audio, scratching, man", "a woman, something, fried"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "an engine sputters followed by a car zooming by"], "sample_ids": ["zl9Dqx-j7q4", "u5RmF3c3Aw"], "start_seconds": ["6", "60"], "properties": ["motors rev, laugh, loudly", "engine, car, zoom"], "captions_pred_video": ["footage of a man driving a car in the dark", null], "captions_pred_audio": ["a jet engine roars ", "a race car accelerates and skids with wind noise in the background "], "question": "which entity is a car?", "label": 1}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vmrxwuAMb2I", "tdWhHV3X25Q"], "start_seconds": ["40", "60"], "properties": ["a dog, inhales, exhales", "applause, audience, yells"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a dog barks and growls", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "small dogs yip and bark sharply"], "sample_ids": ["sEprKHm8Sj8", "v-wcQf4BDY0"], "start_seconds": ["90", "120"], "properties": ["car, tires, slows", "bark, yip, sharply"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man speaks, then dials a rotary telephone", "a person is snoring while sleeping"], "sample_ids": ["tK4VlLsNxak", "vJrjSeP17yE"], "start_seconds": ["120", "40"], "properties": ["a, dial, telephone", "a person is sleeping, snoring, person"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "a black background with a small plane flying in the sky"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a person snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a man rubs two objects together then speaks", "a motorcycle engine works nearby"], "sample_ids": ["vveS8HT7Uog", "tOSWIURC-4"], "start_seconds": ["100", "0"], "properties": ["a man, objects, speak", "engine, work, nearby"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", null], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a lawn mower is running "], "question": "which object is moving", "label": 1}, {"captions": ["a person snoring", "vehicles pass by on a roadway"], "sample_ids": ["t8tv5YRMJUg", "tgbONvsP47Y"], "start_seconds": ["0", "0"], "properties": ["a person, snore, loud", "pass, vehicle, roadway"], "captions_pred_video": ["of a man getting his face licked by another man", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a person sniffs and breathes heavily", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["someone whistles a tune", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sIXTftIuUgw", "zj2R0XoFr5k"], "start_seconds": ["90", "50"], "properties": ["someone, tune, whistle", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a person whistling a song", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "someone whistles a tune"], "sample_ids": ["wy1eKjR7KC0", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["people, talk, distance", "someone, tune, whistle"], "captions_pred_video": ["two police officers riding motorcycles down the street", null], "captions_pred_audio": ["a man is speaking and a siren is going off", "a person whistling a song"], "question": "which is a musical instrument", "label": 1}, {"captions": ["a man speaks, then dials a rotary telephone", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["tK4VlLsNxak", "uZesmtKZGSw"], "start_seconds": ["120", "250"], "properties": ["a, dial, telephone", "men, talk, cars"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about a man speaking and dialing a rotary telephone?", "label": 0}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["sofxkNWaP0s", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["wind, engine, louder", "People, motor, brakes"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", null], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a man speaks while water drains", "a muffled toilet flushes and the water drains"], "sample_ids": ["vSeGhaZt-aI", "sfAvvZwdLCY"], "start_seconds": ["50", "20"], "properties": ["water, drain, man", "flushes, drains, water"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a toilet is flushed"], "question": "which entity has more water", "label": 1}, {"captions": ["a bus engine idles while a woman speaks making an announcement", "a saw finishes running as metal clings in the background"], "sample_ids": ["su6FAOcOA8c", "zofjfKhqLk8"], "start_seconds": ["4", "10"], "properties": ["engine, idle, woman", "background, metal, clings"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "footage of a man using a machine to cut a piece of wood"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a large engine is running and a bell is ringing"], "question": "which entity is about a saw?", "label": 1}, {"captions": ["a woman speaks as frying food sizzles", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wTideSjRFS0", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["food, sizzle, woman", "applause, audience, yells"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "birds chirp in the background while a horse neighs followed by a girl speaking"], "sample_ids": ["vSeGhaZt-aI", "s59PfAghdkM"], "start_seconds": ["50", "0"], "properties": ["water, bubbles, speak", "bird, chirp, background, horse, neigh"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "is an anime scene featuring two people and a horse in the foreground and a fence in the background"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "birds are chirping a horse is neighing and a woman is speaking "], "question": "which entity has a horse in it?", "label": 1}, {"captions": ["birds chirp as a bell rings", "bees buzz and wind blows"], "sample_ids": ["ziUT9IFTkjg", "tMJne1a4AFI"], "start_seconds": ["10", "0"], "properties": ["chirp, bell, ring", "bees buzz, wind blows, bees"], "captions_pred_video": [null, "a swarm of bees on the ground"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a swarm of bees buzzing around"], "question": "which entity is buzzing", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "an infant crying frantically"], "sample_ids": ["yYEVLuqEytU", "zwOBqeFTgiU"], "start_seconds": ["40", "30"], "properties": ["grunt, slurp, background", "cry, infant, frantically"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "of the baby crying in the car seat"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a baby cries loudly"], "question": "which entity is a baby", "label": 1}, {"captions": ["people speak and tapping occurs", "a man speaking with light rustling"], "sample_ids": ["tFCUUGdREgA", "zOZleIRqZm4"], "start_seconds": ["70", "80"], "properties": ["people, tap, speak", "light, rustling, man"], "captions_pred_video": ["a person riding a white horse in an indoor arena", "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a man is speaking and walking with wind noise in the background ", "a man is speaking with crickets chirping in the background"], "question": "which entity is more quiet", "label": 1}, {"captions": ["dogs bark as an engine runs and a person whistles", "a woman speaks happily and an animal chirps"], "sample_ids": ["zY3icUyMdh8", "uWAAAL4CIoc"], "start_seconds": ["20", "0"], "properties": ["dog, bark, engine", "a woman, chirps, animal"], "captions_pred_video": ["footage of a bus driving through a residential street at night", null], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "birds chirp and objects are moved around"], "sample_ids": ["yZrFNS7GFBQ", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["pigeon, buzzes, insect", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["of the bird in the cage", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["an owl hoots in the background ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "music plays and a woman speaks on a radio before gunshots are fired"], "sample_ids": ["wqZ135Ssz0", "xKB8O8LTs6s"], "start_seconds": ["60", "70"], "properties": ["two men, woman, birds", "music, radio, gunshots"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has a woman speaking on a radio?", "label": 1}, {"captions": ["a man speaks as a boat engine runs", "vehicles pass by on a roadway"], "sample_ids": ["wtDqrBygTcU", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["man, engine, run", "pass, vehicle, roadway"], "captions_pred_video": ["shows a person riding on the back of a boat as it speeds through the water", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and a motor is running", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["someone whistles a song", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sIXTftIuUgw", "uZesmtKZGSw"], "start_seconds": ["90", "250"], "properties": ["someone, song, whistle", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a person whistling a song", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["someone is typing on a computer keyboard", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["v0x1odnXtP0", "su6FAOcOA8c"], "start_seconds": ["210", "4"], "properties": ["keyboard, type, computer", "engine, idle, woman"], "captions_pred_video": ["how to make money on youtube in spanish", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a person is typing on a keyboard", "a woman is speaking and a subway train is moving "], "question": "which is not a person", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "paper is crumpling consistently"], "sample_ids": ["y8dSeubCNI", "v5cSxLaHADY"], "start_seconds": ["4", "0"], "properties": ["men, women, car", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["an engine revving and people talking in the background", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["an audience gives applause", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["x6iCUDmRpKQ", "tDVADusiIoc"], "start_seconds": ["38", "60"], "properties": ["applause, audience, give", "water, radio, man"], "captions_pred_video": ["a black background with the moon and stars in the sky", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a group of people are clapping and cheering", "a man is speaking while the wind is blowing and water is splashing"], "question": "which is not a person", "label": 0}, {"captions": ["waves crash against a shoreline and people speak", "a clock ticktocks"], "sample_ids": ["yFB25fqfU8I", "v-g-j2uTByM"], "start_seconds": ["300", "30"], "properties": ["wave, crash, shoreline", "ticktocks, clock, ticktocks"], "captions_pred_video": ["footage of a person surfing in the ocean", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and laughing while water is splashing and gurgling", "a clock is ticking loudly"], "question": "which entity is a clock?", "label": 1}, {"captions": ["children cry and people talk", "paper is crumpling consistently"], "sample_ids": ["xLwHe825Zs", "v5cSxLaHADY"], "start_seconds": ["18", "0"], "properties": ["people talk, children cry, people talk", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a baby cries and a woman speaks", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["sK4u5T8hW78", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["a, car, pass", "wind, blow, vehicle"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a vehicle engine running?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a sleeping person snores and wheezes"], "sample_ids": ["sfAvvZwdLCY", "spJCm8tD9Zo"], "start_seconds": ["20", "90"], "properties": ["water drains, flushes, water", "snores, wheezes, sleeps"], "captions_pred_video": ["footage of the toilet in the bathroom", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a toilet is flushed", "a person is snoring loudly"], "question": "which entity is a source of noise", "label": 1}, {"captions": ["a machine beeps continuously", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["y682ml90jGw", "yDoT73BWsdA"], "start_seconds": ["11", "10"], "properties": ["beeps, machine, continuously", "engine, revs, vehicle"], "captions_pred_video": [null, "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a beeping sound is being made ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "pigeons vocalize and birds chirp"], "sample_ids": ["yJ0TePmaOo", "uiS58TNyUiw"], "start_seconds": ["390", "430"], "properties": ["two hard objects, man, speak", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a infant makes noise and is excited", "multiple people speak and children yell while water gurgles"], "sample_ids": ["wIJK3-5y0kA", "vb1fPSDI4c"], "start_seconds": ["30", "30"], "properties": ["noise, excited, infant", "multiple, people, yell"], "captions_pred_video": ["of a baby playing with a cat in a dark room", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a dog whimpers as someone inhales/exhales briefly", "an insect buzzes around continuously"], "sample_ids": ["vmrxwuAMb2I", "v25l1jef3JY"], "start_seconds": ["40", "0"], "properties": ["a dog, inhales, exhales", "buzzes, continuously, insect"], "captions_pred_video": ["of the dog laying on the bed with his head out of the blanket", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a dog barks and growls", "a fly is buzzing around a microphone "], "question": "which entity is not a dog?", "label": 1}, {"captions": ["a helicopter engine idles continuously", "vehicles pass by on a roadway"], "sample_ids": ["ugHJF0hfYkg", "tgbONvsP47Y"], "start_seconds": ["10", "0"], "properties": ["engine, idle, continuously", "pass, vehicle, roadway"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a helicopter is flying overhead ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "people speak as gunfire rings out"], "sample_ids": ["tIY7qOV3rEM", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "gunfire, ring, speak"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a flush is followed by gurgling water, then another flush", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["tqR406bGiE", "zFjIWfSD-4"], "start_seconds": ["40", "410"], "properties": ["flush, water, gurgle", "People, motor, brakes"], "captions_pred_video": [null, null], "captions_pred_audio": ["a toilet is flushed", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about water?", "label": 0}, {"captions": ["water flows as men speak and yell", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vJ7JPEFhyLA", "w5W5Kqtc8E"], "start_seconds": ["16", "100"], "properties": ["water, flow, men", "wind, blow, vehicle"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a toilet flushes and water drains", "a motor noise is accompanied by a door opening and closing"], "sample_ids": ["sfAvvZwdLCY", "vBHyYJ8pL0"], "start_seconds": ["20", "2"], "properties": ["water drains, flushes, water", "noise, door, opening"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is accompanied by a door opening and closing?", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "a duck quacks continuously"], "sample_ids": ["uRExseg-0XI", "vh30P49Po6s"], "start_seconds": ["210", "30"], "properties": ["woman, man, water", "quacks, continuously, duck"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a man is filing a hard object", "an infant crying as a woman laughs"], "sample_ids": ["vveS8HT7Uog", "xhmRY9yhC7c"], "start_seconds": ["100", "20"], "properties": ["a man, hard, object", "a, laugh, infant"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a baby cries and a woman speaks"], "question": "which object is harder to file", "label": 0}, {"captions": ["an aircraft engine runs as people speak", "a man speaks as a motor runs in the background"], "sample_ids": ["wTjoRj1se3U", "xZepNM9qcRA"], "start_seconds": ["390", "30"], "properties": ["engine, run, people", "background, motor, run"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a jet engine is running and people are talking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["zofjfKhqLk8", "sLUnaPT5gM8"], "start_seconds": ["10", "0"], "properties": ["noise, stop, motor", "loud, laughter, intermittent"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a toilet flushes and a female speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["yaln9y8I7ms", "tDVADusiIoc"], "start_seconds": ["230", "60"], "properties": ["female, flushes, toilet", "water, radio, man"], "captions_pred_video": ["footage is blurry and out of focus", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has a female speaking?", "label": 0}, {"captions": ["a person is snoring while sleeping", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vJrjSeP17yE", "vfYTJq7nU"], "start_seconds": ["40", "130"], "properties": ["a person is sleeping, snoring, person", "rustling, ducks, quack"], "captions_pred_video": ["a black background with a small plane flying in the sky", null], "captions_pred_audio": ["a person snoring loudly", "a duck quacks and a woman speaks"], "question": "which entity is a video", "label": 1}, {"captions": ["a kid speaks followed by music playing", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["tQWGZLItBXk", "uYT5gxnyMWM"], "start_seconds": ["170", "50"], "properties": ["music, kid, speak", "a, scream, girl"], "captions_pred_video": ["worms revolution screenshots", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a woman is speaking and a baby is crying"], "question": "which entity is followed by a scream", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["xzKKf9bKNUo", "w5W5Kqtc8E"], "start_seconds": ["10", "100"], "properties": ["background, noise, snoring", "wind, blow, vehicle"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", null], "captions_pred_audio": ["a person snoring loudly", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "water splashes as an animal walks through"], "sample_ids": ["vddP56-ogds", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["water, flow, laugh", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["water running down a sink while a man is talking", "birds chirp and objects are moved around"], "sample_ids": ["vSeGhaZt-aI", "yPUYU6t3rwo"], "start_seconds": ["50", "370"], "properties": ["water, sink, talk", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["an animal hissing followed by a man mumbling then a pig oinking while birds chirp in the background", "a helicopter engine idles continuously"], "sample_ids": ["yYEVLuqEytU", "ugHJF0hfYkg"], "start_seconds": ["40", "10"], "properties": ["animal, pig, background", "engine, idle, continuously"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["several sheep bleat and a man speaks", "a helicopter is flying overhead "], "question": "which entity is silent", "label": 1}, {"captions": ["a person screams glaringly", "a stream of water runs briefly"], "sample_ids": ["xC8kbrKJmco", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["glaringly, screams, person", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a goat is bleating ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "a man speaks as crickets sing"], "sample_ids": ["sLUnaPT5gM8", "ryFDPxgDOGc"], "start_seconds": ["0", "570"], "properties": ["loud, laughter, intermittent", "a, crickets, sing"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "a group of people dressed in camouflage and hunting gear in the dark"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a man is speaking with crickets chirping in the background"], "question": "which entity is quieter", "label": 1}, {"captions": ["an aircraft engine runs", "water flows and trickles"], "sample_ids": ["yLCORCnd35Q", "tB7hWb9gTuQ"], "start_seconds": ["0", "30"], "properties": ["engine, aircraft, runs", "water, flow, trickle"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "some men converse over an engine running"], "sample_ids": ["u--KhUW8l1Y", "sCiy7QS1U"], "start_seconds": ["0", "300"], "properties": ["horn, siren, life", "men, converse, engine"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", null], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a baby cries and wails as an adult female speaks", "a few ducks quack and scamper and a man speaks"], "sample_ids": ["zliInBdC98Y", "w2bYrCVLT60"], "start_seconds": ["30", "120"], "properties": ["a, baby, cries, wails", "ducks, speak, quack"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", "of the ducks drinking from a pink pool in the grass"], "captions_pred_audio": ["a baby cries and a woman speaks", "ducks are quacking and a man is speaking"], "question": "which entity is speaking", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "a clock ticktocks"], "sample_ids": ["sa6TLVbooCc", "v-g-j2uTByM"], "start_seconds": ["240", "30"], "properties": ["people, laugh, child", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["someone whistles a tune", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sIXTftIuUgw", "zj2R0XoFr5k"], "start_seconds": ["90", "50"], "properties": ["someone, tune, whistle", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a person whistling a song", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["a door opens and birds chirp", "a man speaks as a car is passing by"], "sample_ids": ["yeFvk9x0wWI", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["door, open, birds", "a, car, pass"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["people cheer as a vehicle engine revs", "someone snores nearby"], "sample_ids": ["xjhAnI2q6hM", "spJCm8tD9Zo"], "start_seconds": ["6", "90"], "properties": ["engine revs, vehicle, people", "someone snores, nearby, someone"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a person is snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "waves crash against a shoreline and people speak"], "sample_ids": ["v0x1odnXtP0", "yFB25fqfU8I"], "start_seconds": ["210", "300"], "properties": ["keyboard, type, computer", "wave, crash, shoreline"], "captions_pred_video": ["how to make money on youtube in spanish", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a person is typing on a keyboard", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which is a natural phenomenon", "label": 1}, {"captions": ["rustling leaves with some light squeaking and wind blowing hard followed by a light smack", "a stream of water flows as people talk and wind blows"], "sample_ids": ["yI-KvObbDoY", "xBxDz0CFVn0"], "start_seconds": ["260", "30"], "properties": ["sound, smack, wind", "stream, water, flow"], "captions_pred_video": ["of a man hanging clothes on a clothesline in the backyard", "footage is blurry and out of focus"], "captions_pred_audio": ["the wind is blowing and footsteps are walking ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "people speak in a closed space"], "sample_ids": ["sYITalLZjj4", "sTpirNYo8vQ"], "start_seconds": ["30", "30"], "properties": ["water, rushes, background, birds", "people, space, speak"], "captions_pred_video": ["two ducks are swimming in the water near each other", "of a man taking a selfie on a bus"], "captions_pred_audio": ["wind blows and birds chirp", "a man is speaking while a car is revving and accelerating "], "question": "which entity is more quiet", "label": 0}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "water flows and trickles"], "sample_ids": ["uC9dtII1KDI", "tB7hWb9gTuQ"], "start_seconds": ["150", "30"], "properties": ["wind, gusts, distance", "water, flow, trickle"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "water is splashing and gurgling"], "question": "which entity is more likely to be a natural phenomenon", "label": 1}, {"captions": ["paper folding and crinkling", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zPpG3RD8lSs", "yajyRTUQk3U"], "start_seconds": ["20", "400"], "properties": ["paper, fold, crinkle", "a woman, something, fried"], "captions_pred_video": ["how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's", "- a woman cooking in the kitchen"], "captions_pred_audio": ["the wind blows and a mouse clicks ", "a woman is speaking while food is frying in the background"], "question": "which entity is about food?", "label": 1}, {"captions": ["a motor idles, accelerates, then slows down.", "someone is typing on a computer keyboard"], "sample_ids": ["vYkA3cfXp5Q", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["speed, idle, accelerate", "keyboard, type, computer"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "how to make money on youtube in spanish"], "captions_pred_audio": ["an engine is idling", "a person is typing on a keyboard"], "question": "which is a person", "label": 1}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "a toilet flushes and a female speaks"], "sample_ids": ["vddP56-ogds", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["water, splash, person, laugh", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["v7jJS8aAyA", "wDVMhEdTiVw"], "start_seconds": ["10", "30"], "properties": ["wind, blows, loudly", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is not followed by water sloshing nearby?", "label": 0}, {"captions": ["a vehicle engine accelerates and wind blows", "a toilet flushes and a female speaks"], "sample_ids": ["wudZTNBtVqc", "yaln9y8I7ms"], "start_seconds": ["60", "230"], "properties": ["accelerates, engine, wind", "female, flushes, toilet"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", "footage is blurry and out of focus"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a toilet flushes and a man speaks"], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["sQwlkXjQabo", "sLUnaPT5gM8"], "start_seconds": ["10", "0"], "properties": ["liquid, surface, spray", "loud, laughter, intermittent"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["spraying followed by silence", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["an adult male speaks and dials a rotary phone", "an airplane engine runs"], "sample_ids": ["tK4VlLsNxak", "yVPZ2MNWpms"], "start_seconds": ["120", "0"], "properties": ["An adult male speaks, dials, and speaks into a rotary phone", "engine, airplane, runs"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a car is driving by on the road "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a man woman speak while crickets sing", "a clock ticktocks"], "sample_ids": ["zTLVJCo4WEE", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["a, crickets, sing", "ticktocks, clock, ticktocks"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["women speak and laugh as wind blows", "water flows and trickles"], "sample_ids": ["un9VQlzgZM", "tB7hWb9gTuQ"], "start_seconds": ["5", "30"], "properties": ["wind, speak, laugh", "water, flow, trickle"], "captions_pred_video": [null, "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "water is splashing and gurgling"], "question": "which entity is moving", "label": 1}, {"captions": ["water runs from a faucet while some men speak and the water runs in the sink", "vehicles pass by on a roadway"], "sample_ids": ["vzceMbklWc", "tgbONvsP47Y"], "start_seconds": ["180", "0"], "properties": ["water, faucet, sink", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["water is running and a man is speaking", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a man talks as something metal hits against and glass is set down", "a child speaks in closed space"], "sample_ids": ["x6ijhqRY38s", "yW6FWLSLkx4"], "start_seconds": ["250", "40"], "properties": ["something metal, glass, hit", "child, space, speak"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["an insect buzzes around continuously", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["v25l1jef3JY", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["buzzes, continuously, insect", "animal, grunts, snorts"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a woman is speaking and a baby is crying"], "question": "which entity is not a person?", "label": 0}, {"captions": ["an audience gives applause", "dishes cling together then a man begins to speak"], "sample_ids": ["x6iCUDmRpKQ", "sQGXqGcwOTc"], "start_seconds": ["38", "3"], "properties": ["applause, audience, give", "cling, speak, dishes"], "captions_pred_video": ["a black background with the moon and stars in the sky", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a group of people are clapping and cheering", "mechanisms are operating and water is splashing "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["a jet engine roars, almost making a man inaudible", "a man speaks followed by another man speaking outside"], "sample_ids": ["xfaoyyzw2WU", "viuTg1M-dqg"], "start_seconds": ["180", "30"], "properties": ["loud, jet engine, roar", "two men, speak, follow"], "captions_pred_video": ["footage of an airplane on the tarmac at an airport", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["an aircraft engine roars and a man speaks ", "a man is speaking with background noise and breathing sounds "], "question": "which is not a pair of men speaking?", "label": 0}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["zY3icUyMdh8", "ziUT9IFTkjg"], "start_seconds": ["20", "10"], "properties": ["dog, bark, engine", "background, birds, rustling"], "captions_pred_video": ["footage of a bus driving through a residential street at night", null], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "birds are chirping and a chime is ringing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["vehicle engines accelerate as a man commentates on an intercom", "an airplane engine runs"], "sample_ids": ["sZPuqDgX2V0", "yVPZ2MNWpms"], "start_seconds": ["30", "0"], "properties": ["engine, accelerate, intercom", "engine, airplane, runs"], "captions_pred_video": [null, "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a car is driving by on the road "], "question": "which engine is running", "label": 0}, {"captions": ["loud clanking and banging with brief male speech", "a toilet flushes and a female speaks"], "sample_ids": ["sWZzXuWYY", "yaln9y8I7ms"], "start_seconds": ["420", "230"], "properties": ["male, speech, banging", "female, flushes, toilet"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a toilet flushes and a man speaks"], "question": "which entity is silent", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "wind blows and people scream while an engine revs"], "sample_ids": ["wqUmIEzuNz4", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["frog, bird, vocalize", "wind, engine, scream"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", null], "captions_pred_audio": ["a cat meows and rustles", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is not a frog?", "label": 1}, {"captions": ["wind blows in gusts as a woman speaks in the distance", "multiple motorcycles pass by as a man speaks"], "sample_ids": ["uC9dtII1KDI", "zcDwZ6W7E3E"], "start_seconds": ["150", "180"], "properties": ["wind, gusts, distance", "man, speak, motorcycles"], "captions_pred_video": ["footage of a person riding a horse in a riding arena", "2 people riding motorcycles down a mountain road with trees lining the sides of the road"], "captions_pred_audio": ["a woman is speaking with wind noise and breathing in the background ", "a man is speaking while a car accelerates and revs its engine "], "question": "which entity is more likely to be in a city", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "a man speaks as a motor runs in the background"], "sample_ids": ["vKrYfzleLB8", "xZepNM9qcRA"], "start_seconds": ["110", "30"], "properties": ["a, ring, gunshots", "background, motor, run"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a man yell?", "label": 0}, {"captions": ["a train horn blows as it passes by", "a duck quacks continuously"], "sample_ids": ["zVacuqSb4LI", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["horn, blows, train", "quacks, continuously, duck"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "water flows as men speak and yell"], "sample_ids": ["zcDwZ6W7E3E", "vJ7JPEFhyLA"], "start_seconds": ["180", "16"], "properties": ["man, speak, motorcycles", "water, flow, men"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water flowing as men speak and yell?", "label": 1}, {"captions": ["a car speeding up in the distance", "a woman speaks happily and an animal chirps"], "sample_ids": ["u0TrcHhkPQ", "uWAAAL4CIoc"], "start_seconds": ["20", "0"], "properties": ["distance, car, speed", "a woman, chirps, animal"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a woman is speaking and a dog is barking "], "question": "which entity is not moving", "label": 1}, {"captions": ["a duck quacks continuously", "a machine beeps continuously"], "sample_ids": ["vh30P49Po6s", "y682ml90jGw"], "start_seconds": ["30", "11"], "properties": ["quacks, continuously, duck", "beeps, machine, continuously"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "an engine runs loudly"], "sample_ids": ["v0x1odnXtP0", "vqZuVbG6-HI"], "start_seconds": ["210", "130"], "properties": ["keyboard, type, computer", "loud, engine, run"], "captions_pred_video": ["how to make money on youtube in spanish", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a person is typing on a keyboard", "a lawn mower is running and men are speaking "], "question": "which is quieter", "label": 0}, {"captions": ["a person is snoring while sleeping", "someone snores nearby"], "sample_ids": ["vJrjSeP17yE", "spJCm8tD9Zo"], "start_seconds": ["40", "90"], "properties": ["a person is sleeping, snoring, person", "someone snores, nearby, someone"], "captions_pred_video": ["a black background with a small plane flying in the sky", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a person snoring loudly", "a person is snoring loudly"], "question": "which entity is a person", "label": 0}, {"captions": ["birds coo incessantly", "a horn blows as a train chugs along and warning bells ring"], "sample_ids": ["yZrFNS7GFBQ", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["coo, bird, incessant", "a train, a horn, a bell"], "captions_pred_video": ["of the bird in the cage", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["an owl hoots in the background ", "a train blows its whistle and blows its horn "], "question": "which entity is a warning", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["vs65y4qmyBE", "yajyRTUQk3U"], "start_seconds": ["340", "400"], "properties": ["engine, run, man", "a woman, something, fried"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["an electronic device bleeps once", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["tHJ6JSa8Y4", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["bleeps, electronic, device", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a clock is ticking and beeping", "a woman is speaking and a subway train is moving "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a kid speaks followed by music playing", "a stream of water runs briefly"], "sample_ids": ["tQWGZLItBXk", "x-PeY8Yb8M4"], "start_seconds": ["170", "300"], "properties": ["music, kid, speak", "stream, water, run"], "captions_pred_video": ["worms revolution screenshots", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["people speak in a closed space", "an insect buzzes around continuously"], "sample_ids": ["sTpirNYo8vQ", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["people, space, speak", "buzzes, continuously, insect"], "captions_pred_video": ["of a man taking a selfie on a bus", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a fly is buzzing around a microphone "], "question": "which entity is not a person", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "vehicles pass by on a roadway"], "sample_ids": ["yZrFNS7GFBQ", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["pigeon, buzzes, insect", "pass, vehicle, roadway"], "captions_pred_video": ["of the bird in the cage", "footage of a fire truck entering a garage"], "captions_pred_audio": ["an owl hoots in the background ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["wnpJndXuxLc", "w5W5Kqtc8E"], "start_seconds": ["50", "100"], "properties": ["blows, vehicle, train", "wind, blow, vehicle"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", null], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a train blowing a horn?", "label": 0}, {"captions": ["wind blows and women speak as livestock vocalizes", "several insects fly while two men talk"], "sample_ids": ["vXlk0lIQBFo", "s-T9OVOiMLo"], "start_seconds": ["470", "330"], "properties": ["wind, speak, vocalize", "several, fly, men"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is about animals?", "label": 0}, {"captions": ["a toilet flushes and a female speaks", "people speak and tapping occurs"], "sample_ids": ["yaln9y8I7ms", "tFCUUGdREgA"], "start_seconds": ["230", "70"], "properties": ["female, flushes, toilet", "people, tap, speak"], "captions_pred_video": ["footage is blurry and out of focus", "a person riding a white horse in an indoor arena"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a man is speaking and walking with wind noise in the background "], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["an engine idles consistently before sputtering some", "a stream of water flows as people talk and wind blows"], "sample_ids": ["rwTERCUno", "xBxDz0CFVn0"], "start_seconds": ["90", "30"], "properties": ["engine, idle, sputter", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["an engine is idling and vibrating", "a man is speaking with wind noise in the background "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["zl9Dqx-j7q4", "y2bVZ7rz-5M"], "start_seconds": ["6", "280"], "properties": ["engine, laugh, loud", "motor noise, horn, siren"], "captions_pred_video": ["footage of a man driving a car in the dark", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a jet engine roars ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is followed by a horn honking and a siren wailing?", "label": 1}, {"captions": ["wind noise makes sound into a microphone", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["w8uLijTqtlU", "y8WEcpOlT3I"], "start_seconds": ["70", "40"], "properties": ["wind, microphone, noise", "harsh, wind, blows"], "captions_pred_video": ["footage is blurry and shaky", "on how to use a sewing machine youtube"], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking with wind noise in the background "], "question": "which entity is a recording of a harsh wind blowing?", "label": 1}, {"captions": ["male speech with light ticking", "several ducks quack and cocks crow far away"], "sample_ids": ["xO-Q2BlIIPU", "sNB8zxXneIM"], "start_seconds": ["30", "20"], "properties": ["male, speech, ticking", "several, quack, cocks"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "a group of geese in a cage"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a rooster is crowing and wind is blowing "], "question": "which entity is a bird", "label": 1}, {"captions": ["heavy rain splashes as it falls", "wind blows as people chatter quietly"], "sample_ids": ["wP8ZKrlx3oA", "xBxDz0CFVn0"], "start_seconds": ["40", "30"], "properties": ["fall, rain, splash", "wind, chatter, people"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage is blurry and out of focus"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a music is played followed by a frog croaking and then music is played again", "someone whistles a tune"], "sample_ids": ["voJh2gJxXhA", "sIXTftIuUgw"], "start_seconds": ["50", "90"], "properties": ["music, frog, croak", "someone, tune, whistle"], "captions_pred_video": ["a frog on a black background with a red diamond in the center", null], "captions_pred_audio": ["music is playing and crickets are chirping ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a pigeon cooing as an insect buzzes by briefly", "paper is crumpling consistently"], "sample_ids": ["yZrFNS7GFBQ", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["pigeon, buzzes, insect", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of the bird in the cage", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["an owl hoots in the background ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["an aircraft engine runs", "winds blows roughly as a vehicle races past"], "sample_ids": ["yLCORCnd35Q", "xjvTpk2Zpr8"], "start_seconds": ["0", "70"], "properties": ["engine, aircraft, runs", "wind, blows, vehicle"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["a vehicle is skidding and squealing tires", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["soTOh3zYJfY", "tdWhHV3X25Q"], "start_seconds": ["40", "60"], "properties": ["vehicle, skid, tires", "applause, audience, yells"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["uWAAAL4CIoc", "uEU-Hg5MTN8"], "start_seconds": ["0", "27"], "properties": ["a woman, chirps, animal", "a woman, laughs, animal"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking happily and an animal chirps?", "label": 0}, {"captions": ["a man speaks as he moves silverware in a bowl", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["x6ijhqRY38s", "uEU-Hg5MTN8"], "start_seconds": ["250", "27"], "properties": ["bowl, silverware, man", "animal, grunts, snorts"], "captions_pred_video": ["a chef preparing a dish with a bottle of wine and a plate of food on a table", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and dishes are clanging ", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "a horn honks and then loudly blares"], "sample_ids": ["sG7TyPnFDR0", "wnpJndXuxLc"], "start_seconds": ["180", "50"], "properties": ["beeps, machine, smoke alarm", "horn, honk, loud"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["a sheep baa followed by birds chirping and then more sheep baaing", "an infant crying as a woman laughs"], "sample_ids": ["vlS6YMeWAPo", "xhmRY9yhC7c"], "start_seconds": ["40", "20"], "properties": ["sheep, baa, birds", "a, laugh, infant"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a goat bleats and birds chirp", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["wyllXV6PjKo", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["a kid, talk, cry", "harsh, wind, blows"], "captions_pred_video": [null, "on how to use a sewing machine youtube"], "captions_pred_audio": ["a woman speaks and a baby cries", "a man is speaking with wind noise in the background "], "question": "which entity has a harsh wind blowing?", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "a person speaks over rustling leaves"], "sample_ids": ["uYT5gxnyMWM", "zOZleIRqZm4"], "start_seconds": ["50", "80"], "properties": ["person, spray, yell", "rustling, leaves, person"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a person picking berries from the bushes in the garden"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking with crickets chirping in the background"], "question": "which entity has a person speaking over a background of rustling leaves?", "label": 1}, {"captions": ["water splashes as an animal walks through", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["w1ir-sZ3Im8", "zl9Dqx-j7q4"], "start_seconds": ["90", "6"], "properties": ["animal, water, splashes", "engine, laugh, loud"], "captions_pred_video": ["footage of a group of people riding horses through a river", "footage of a man driving a car in the dark"], "captions_pred_audio": ["water splashes and gurgles as people speak", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["ul60S8TXDA8", "uYT5gxnyMWM"], "start_seconds": ["60", "50"], "properties": ["sound, distance, bell", "female, spraying, scream"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "multiple people speak and children yell while water gurgles"], "sample_ids": ["vBslzh7saPw", "vb1fPSDI4c"], "start_seconds": ["90", "30"], "properties": ["power, scream, increase", "multiple, people, yell"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "a crowd of people are talking and laughing"], "question": "which entity is quieter", "label": 1}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["uYT5gxnyMWM", "sLUnaPT5gM8"], "start_seconds": ["50", "0"], "properties": ["female, spraying, scream", "loud, laughter, intermittent"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more calming", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["y8WEcpOlT3I", "uYT5gxnyMWM"], "start_seconds": ["40", "50"], "properties": ["wind, speak, buffeting", "a, scream, girl"], "captions_pred_video": ["on how to use a sewing machine youtube", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a woman is speaking and a baby is crying"], "question": "which entity is a recording of a girl speaking?", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["zofjfKhqLk8", "xKB8O8LTs6s"], "start_seconds": ["10", "70"], "properties": ["noise, stop, motor", "music, gunfire, explosion"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "a clock ticktocks"], "sample_ids": ["sQwlkXjQabo", "v-g-j2uTByM"], "start_seconds": ["10", "30"], "properties": ["liquid, surface, spray", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["spraying followed by silence", "a clock is ticking loudly"], "question": "which entity is not a clock?", "label": 0}, {"captions": ["an engine runs loudly", "a telephone rings followed by a woman talking"], "sample_ids": ["vqZuVbG6-HI", "tGcFnX0GHI"], "start_seconds": ["130", "0"], "properties": ["loud, engine, run", "ring, talk, woman"], "captions_pred_video": ["footage is blurry because it's raining outside", null], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a dial tone sounds followed by a woman speaking"], "question": "which is quieter", "label": 1}, {"captions": ["water running down a sink while a man is talking", "a machine beeps continuously"], "sample_ids": ["vSeGhaZt-aI", "y682ml90jGw"], "start_seconds": ["50", "11"], "properties": ["water, sink, talk", "beeps, machine, continuously"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", null], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a beeping sound is being made "], "question": "which entity is silent", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "people speak as gunfire rings out"], "sample_ids": ["zY3icUyMdh8", "wqTCwqVRDlk"], "start_seconds": ["20", "80"], "properties": ["dog, bark, engine", "gunfire, ring, speak"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["zgUgkpk78xU", "xfaoyyzw2WU"], "start_seconds": ["70", "180"], "properties": ["clinking, humming, horn", "loud, jet engine, roar"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a clicking followed by some people laughing and a kid speaking", "several insects fly while two men talk"], "sample_ids": ["vz8868znkVQ", "s-T9OVOiMLo"], "start_seconds": ["60", "330"], "properties": ["audio, click, kid speaking", "several, fly, men"], "captions_pred_video": ["a video of a plane flying over a cloudy sky", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a baby is laughing and breathing with background noise ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a video", "label": 1}, {"captions": ["a woman and man are speaking", "popping and crackling repeats as men yell and laugh"], "sample_ids": ["vbpKkWvfOu4", "rqu8iB22IY"], "start_seconds": ["560", "5"], "properties": ["two people, speaking, woman, man", "sound, repeats, laugh"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", null], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a dog barks and a man speaks while music plays "], "question": "which entity has more people speaking", "label": 0}, {"captions": ["a man is snoring loudly and repeatedly", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["sncRqQ67iJU", "tdWhHV3X25Q"], "start_seconds": ["460", "60"], "properties": ["loud, repeatedly, man", "applause, audience, yells"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a person is snoring", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "a infant makes noise and is excited"], "sample_ids": ["sfAvvZwdLCY", "wIJK3-5y0kA"], "start_seconds": ["20", "30"], "properties": ["flushes, drains, water", "noise, excited, infant"], "captions_pred_video": ["footage of the toilet in the bathroom", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a toilet is flushed", "a baby cries and a woman speaks"], "question": "which entity is quieter", "label": 0}, {"captions": ["someone snores nearby", "water pouring and bubbling"], "sample_ids": ["spJCm8tD9Zo", "uyRfq-jKPpo"], "start_seconds": ["90", "50"], "properties": ["someone snores, nearby, someone", "water, bubbles, pouring"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a person is snoring loudly", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a car speeding up in the distance", "some men converse over an engine running"], "sample_ids": ["u0TrcHhkPQ", "sCiy7QS1U"], "start_seconds": ["20", "300"], "properties": ["distance, car, speed", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man speaks while a motorcycle revs and accelerates "], "question": "which is a static image", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "people speak as gunfire rings out"], "sample_ids": ["sWZzXuWYY", "wqTCwqVRDlk"], "start_seconds": ["420", "80"], "properties": ["male, speech, banging", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "a clock ticktocks"], "sample_ids": ["soTOh3zYJfY", "v-g-j2uTByM"], "start_seconds": ["40", "30"], "properties": ["vehicle, skid, tires", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["people speak as gunfire rings out", "dogs bark as an engine runs and a person whistles"], "sample_ids": ["wqTCwqVRDlk", "zY3icUyMdh8"], "start_seconds": ["80", "20"], "properties": ["gunfire, ring, speak", "dog, bark, engine"], "captions_pred_video": ["of a woman shooting a gun at a target on the beach", "footage of a bus driving through a residential street at night"], "captions_pred_audio": ["a man is speaking and a gun is fired", "a car is driving and dogs are barking and squealing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person is whistling a tune", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["scYRUkrFLiQ", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["a, tune, whistle", "animal, grunts, snorts"], "captions_pred_video": ["of the man wearing a bow tie and a suit jacket in front of a red door", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person whistling a song", "a woman is speaking and a baby is crying"], "question": "which entity is not a person?", "label": 1}, {"captions": ["water rushes by", "a woman speaks and other women and a man talk with her"], "sample_ids": ["x-PeY8Yb8M4", "vbpKkWvfOu4"], "start_seconds": ["300", "560"], "properties": ["water, rushes, by", "a, woman, man"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a car is driving on a wet road ", "a woman is speaking and a man is speaking"], "question": "which entity is a group of people", "label": 1}, {"captions": ["after a few seconds of silence, a loud bang occurs followed by a softer banging noise", "a man speaks followed by another man speaking outside"], "sample_ids": ["zkKdxzNC97Y", "viuTg1M-dqg"], "start_seconds": ["27", "30"], "properties": ["loud, bang, noise", "two men, speak, follow"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a loud bang followed by a softer banging noise?", "label": 0}, {"captions": ["a dark barks and whimpers", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["sYj4hpDUZDQ", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["barks, whimpers, dark", "male, duck, laugh"], "captions_pred_video": ["a brown and white dog standing in front of a wall with its mouth open", null], "captions_pred_audio": ["a dog barks and a cat meows", "a man is speaking and ducks are quacking"], "question": "which entity is speaking", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "a man speaks as a car is passing by"], "sample_ids": ["vW4x7S1VfQc", "sK4u5T8hW78"], "start_seconds": ["150", "30"], "properties": ["clacking, oil, woman", "a, car, pass"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["food sizzles in a frying pan", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a horse runs while two women talk", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["sdvI1mHAsc", "tdWhHV3X25Q"], "start_seconds": ["20", "60"], "properties": ["two women, horse, run", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["someone is snoring while sleeping", "a stream of water flows as people talk and wind blows"], "sample_ids": ["ujMt0-D-x2k", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["snore, sleep, someone", "stream, water, flow"], "captions_pred_video": ["of the dog playing with a toy on the floor", "footage is blurry and out of focus"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sEprKHm8Sj8", "uZesmtKZGSw"], "start_seconds": ["90", "250"], "properties": ["car, tires, slows", "men, talk, cars"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking and a car is revving with laughter in the background "], "question": "which car is going faster", "label": 1}, {"captions": ["a infant makes noise and is excited", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wIJK3-5y0kA", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["noise, excited, infant", "applause, audience, yells"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a train horn sounds as a railroad passing bell rings", "a motorcycle engine is revving while people are speaking"], "sample_ids": ["zgUgkpk78xU", "y8dSeubCNI"], "start_seconds": ["70", "4"], "properties": ["horn, bell, train", "engine revving, people speaking, motorcycle"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", null], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "an engine revving and people talking in the background"], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "wind blows as people chatter quietly"], "sample_ids": ["tiDFTC-5vU", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["male, duck, laugh", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a frog vocalizes as birds chirp", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["wqUmIEzuNz4", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["frog, bird, vocalize", "a woman, a television program, a bird"], "captions_pred_video": ["a frog sitting in the grass on a sunny day", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a cat meows and rustles", "a woman is speaking and a dog is whimpering"], "question": "which entity is a frog", "label": 0}, {"captions": ["someone snores nearby", "a child speaks in closed space"], "sample_ids": ["spJCm8tD9Zo", "yW6FWLSLkx4"], "start_seconds": ["90", "40"], "properties": ["someone snores, nearby, someone", "child, space, speak"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "a toilet flushes and water drains"], "sample_ids": ["wvKpEYswXO0", "sfAvvZwdLCY"], "start_seconds": ["150", "20"], "properties": ["plastic, tap, speak", "water drains, flushes, water"], "captions_pred_video": ["of the person preparing food in the kitchen", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a vehicle accelerates and squeals tires", "an electric engine works nearby followed by a child talking"], "sample_ids": ["yRx9txMcBl0", "xSKJGCItUWE"], "start_seconds": ["40", "10"], "properties": ["accelerates, tires, squeals", "engine, work, child"], "captions_pred_video": ["in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta", "footage of the helicopter flying in the room"], "captions_pred_audio": ["a car is revving its engine and skidding ", "a high pitched engine is running and a child speaks"], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["white noise and snoring with some rustling in the background", "people cheer as a vehicle engine revs"], "sample_ids": ["xzKKf9bKNUo", "xjhAnI2q6hM"], "start_seconds": ["10", "6"], "properties": ["background, noise, snoring", "engine revs, vehicle, people"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a person snoring loudly", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["females talk and laugh over gusting wind", "paper is crumpling consistently"], "sample_ids": ["un9VQlzgZM", "v5cSxLaHADY"], "start_seconds": ["5", "0"], "properties": ["females, talk, laugh", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": [null, "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a person is whistling", "a stream of water runs briefly"], "sample_ids": ["sIXTftIuUgw", "x-PeY8Yb8M4"], "start_seconds": ["90", "300"], "properties": ["person, whistling, person", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a person whistling a song", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks while water drains", "vehicles pass by on a roadway"], "sample_ids": ["vSeGhaZt-aI", "tgbONvsP47Y"], "start_seconds": ["50", "0"], "properties": ["water, drain, man", "pass, vehicle, roadway"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["water splashes and wind noise is made into a microphone", "a propeller rotates loudly and intensely"], "sample_ids": ["sDSppXIlJrs", "ugHJF0hfYkg"], "start_seconds": ["27", "10"], "properties": ["microphone, water, wind", "loud, intense, propeller"], "captions_pred_video": ["a man is paddling a small wooden boat in the water", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["the wind is blowing and water is splashing", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "water flows as men speak and yell"], "sample_ids": ["uWAAAL4CIoc", "vJ7JPEFhyLA"], "start_seconds": ["0", "16"], "properties": ["a woman, chirps, animal", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["loud intermittent buzzing with intermittent laughter", "water pouring and bubbling"], "sample_ids": ["sLUnaPT5gM8", "uyRfq-jKPpo"], "start_seconds": ["0", "50"], "properties": ["loud, laughter, intermittent", "water, bubbles, pouring"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "water is running from a faucet"], "question": "which entity is more quiet", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "winds blows roughly as a vehicle races past"], "sample_ids": ["vZAw4apG0Es", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["background, clock, ticktocks", "wind, blows, vehicle"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a clock is ticking and people are talking", "a jet engine roars and wind blows "], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a helicopter engine idles continuously"], "sample_ids": ["su6FAOcOA8c", "ugHJF0hfYkg"], "start_seconds": ["4", "10"], "properties": ["engine, run, woman", "engine, idle, continuously"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a helicopter is flying overhead "], "question": "which entity has a running engine", "label": 0}, {"captions": ["water is sprayed across a hard surface", "an aircraft engine runs"], "sample_ids": ["sQwlkXjQabo", "yLCORCnd35Q"], "start_seconds": ["10", "0"], "properties": ["water, spray, surface", "engine, aircraft, runs"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "a lufthansa airbus a380 landing at london's heathrow airport"], "captions_pred_audio": ["spraying followed by silence", "a train is moving and its wheels are squealing "], "question": "which entity is moving", "label": 1}, {"captions": ["several insects fly while two men talk", "pigeons vocalize and birds chirp"], "sample_ids": ["s-T9OVOiMLo", "uiS58TNyUiw"], "start_seconds": ["330", "430"], "properties": ["several, fly, men", "vocalize, bird, chirp"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a man speaks, then dials a rotary telephone", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tK4VlLsNxak", "zj2R0XoFr5k"], "start_seconds": ["120", "50"], "properties": ["a, dial, telephone", "airplane, boy, fly"], "captions_pred_video": ["person is wearing a headset and holding a remote control in his hand", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and using a sewing machine", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a boy speaking?", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["w5W5Kqtc8E", "zj2R0XoFr5k"], "start_seconds": ["100", "50"], "properties": ["water, splashes, motorboat", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tQWGZLItBXk", "zj2R0XoFr5k"], "start_seconds": ["170", "50"], "properties": ["music, person, ding", "airplane, boy, fly"], "captions_pred_video": ["worms revolution screenshots", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a woman speaks while a helicopter flies overhead "], "question": "which entity has a boy speaking?", "label": 1}, {"captions": ["small dogs yip and bark sharply", "a motor slows to a stopover traffic noises"], "sample_ids": ["v-wcQf4BDY0", "zofjfKhqLk8"], "start_seconds": ["120", "10"], "properties": ["bark, yip, sharply", "noise, stop, motor"], "captions_pred_video": ["footage is blurry and shaky, making it difficult to see what is happening", "footage of a man using a machine to cut a piece of wood"], "captions_pred_audio": ["a dog barks and growls", "a large engine is running and a bell is ringing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a train horn blows as it passes by", "a man speaks followed by another man speaking outside"], "sample_ids": ["zVacuqSb4LI", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["horn, blows, train", "two men, speak, follow"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a single person speaking?", "label": 0}, {"captions": ["women speak and laugh as wind blows", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["un9VQlzgZM", "yajyRTUQk3U"], "start_seconds": ["5", "400"], "properties": ["wind, speak, laugh", "a woman, something, fried"], "captions_pred_video": [null, "- a woman cooking in the kitchen"], "captions_pred_audio": ["a woman is speaking and laughing with wind noise and breathing in the background ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["an small aircraft engine runs and a boy speaks", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["xSKJGCItUWE", "w5W5Kqtc8E"], "start_seconds": ["10", "100"], "properties": ["engine, run, boy", "wind, blow, vehicle"], "captions_pred_video": ["footage of the helicopter flying in the room", null], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a boy speaking?", "label": 0}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "someone is typing on a computer keyboard"], "sample_ids": ["sofxkNWaP0s", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["wind, engine, louder", "keyboard, type, computer"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a person is typing on a keyboard"], "question": "which is a type of computer", "label": 1}, {"captions": ["a man speaks on a radio as wind blows", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["tDVADusiIoc", "sLUnaPT5gM8"], "start_seconds": ["60", "0"], "properties": ["man, radio, blows", "loud, laughter, intermittent"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["wind blows and a stream of water flows nearby", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["sYITalLZjj4", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["stream, flow, wind", "male, duck, laugh"], "captions_pred_video": ["two ducks are swimming in the water near each other", null], "captions_pred_audio": ["wind blows and birds chirp", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["vh30P49Po6s", "ukg5L09Wpvo"], "start_seconds": ["30", "150"], "properties": ["loud, continuous, quacks", "clickety-clack, train, whistle"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a duck is quacking loudly", "a train blows its whistle and blows its horn "], "question": "which entity is quieter", "label": 1}, {"captions": ["a woman speaks followed by another woman whimpering and speaking", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["xOZfdgAgJ9o", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["woman, whimpering, speaking", "animal, grunts, snorts"], "captions_pred_video": ["footage of a woman talking to a man in a doctor's office", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking and a baby is crying"], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["continuous chugging with birds chirping in the background", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["xM4joTqDVp4", "zFjIWfSD-4"], "start_seconds": ["160", "410"], "properties": ["background, chirp, birds", "People, motor, brakes"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", null], "captions_pred_audio": ["birds are chirping and a train is moving ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running and air brakes hissing?", "label": 1}, {"captions": ["a toilet flushes and a female speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["yaln9y8I7ms", "wz7N8YRy74I"], "start_seconds": ["230", "30"], "properties": ["female, flushes, toilet", "rooster, crow, background, men"], "captions_pred_video": ["footage is blurry and out of focus", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a toilet flushes and a man speaks", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster?", "label": 1}, {"captions": ["a saw finishes running as metal clings in the background", "a train horn blows as it passes by"], "sample_ids": ["zofjfKhqLk8", "zVacuqSb4LI"], "start_seconds": ["10", "30"], "properties": ["background, metal, clings", "horn, blows, train"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train?", "label": 1}, {"captions": ["a child yells and another yells", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vMDHu7Lxcgw", "uZesmtKZGSw"], "start_seconds": ["410", "250"], "properties": ["two, yell, child", "men, talk, cars"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has more cars", "label": 1}, {"captions": ["an electric engine works nearby followed by a child talking", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["xSKJGCItUWE", "wqZ135Ssz0"], "start_seconds": ["10", "60"], "properties": ["engine, work, child", "two men, woman, birds"], "captions_pred_video": ["footage of the helicopter flying in the room", null], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["some tunes played by whistling", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["u6BnG6YZqJ4", "uZesmtKZGSw"], "start_seconds": ["0", "250"], "properties": ["tune, play, whistling", "men, talk, cars"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a person whistling a song", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a infant makes noise and is excited", "people applaud and hoot and chat quietly"], "sample_ids": ["wIJK3-5y0kA", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["noise, excited, infant", "people, applaud, hoot"], "captions_pred_video": ["of a baby playing with a cat in a dark room", null], "captions_pred_audio": ["a baby cries and a woman speaks", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a frog croaks as other frogs croak in the background", "a child speaks in closed space"], "sample_ids": ["yswmmRZFItk", "yW6FWLSLkx4"], "start_seconds": ["0", "40"], "properties": ["background, frog, croak", "child, space, speak"], "captions_pred_video": ["a close up of a frog in the water", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a frog is croaking", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["someone is burping continuously", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["y636gklDioE", "zl9Dqx-j7q4"], "start_seconds": ["20", "6"], "properties": ["burps, burps, burps", "engine, laugh, loud"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a person burps loudly several times", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a helicopter engine idles continuously", "waves crash against a shoreline and people speak"], "sample_ids": ["ugHJF0hfYkg", "yFB25fqfU8I"], "start_seconds": ["10", "300"], "properties": ["engine, idle, continuously", "wave, crash, shoreline"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a helicopter is flying overhead ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["several ducks are quacking and squawking", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["wfHeoPDLMaM", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["quacking, squawking, ducks", "female, spraying, scream"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["ducks are quacking", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["u--KhUW8l1Y", "tdWhHV3X25Q"], "start_seconds": ["0", "60"], "properties": ["horn, siren, life", "applause, audience, yells"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a person snores loudly multiple times at a close distance"], "sample_ids": ["vBHyYJ8pL0", "sSMl2vc3ek"], "start_seconds": ["2", "20"], "properties": ["noise, door, opening", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["an emergency siren wails as it passes", "a heavy rain falls endlessly"], "sample_ids": ["vGj1XLJvNrw", "wP8ZKrlx3oA"], "start_seconds": ["0", "40"], "properties": ["wails, wails, pass", "heavy, rain, fall"], "captions_pred_video": ["footage of a police car driving down a city street", "footage of a flooded street in the middle of a desert with mountains in the background"], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "a heavy rain is falling on a surface"], "question": "which entity is falling", "label": 1}, {"captions": ["an engine revs and a turning noise is made", "pigeons vocalize and birds chirp"], "sample_ids": ["tOSWIURC-4", "uiS58TNyUiw"], "start_seconds": ["0", "430"], "properties": ["noise, engine, revs", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a lawn mower is running ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["some men talk among st themselves as cars speed and race loudly", "a stream of water flows as people talk and wind blows"], "sample_ids": ["uZesmtKZGSw", "xBxDz0CFVn0"], "start_seconds": ["250", "30"], "properties": ["men, talk, cars", "stream, water, flow"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["a woman speaks as she rubs two objects together", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vzxHnu-SFEw", "vbZ-0lGPneg"], "start_seconds": ["80", "30"], "properties": ["two objects, woman, speak", "a woman, a television program, a bird"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a woman is speaking and a dog is whimpering"], "question": "which woman is speaking", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "a stream of water runs briefly"], "sample_ids": ["sZPuqDgX2V0", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["commentator, race, track", "stream, water, run"], "captions_pred_video": [null, "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "a child speaks in closed space"], "sample_ids": ["uYT5gxnyMWM", "yW6FWLSLkx4"], "start_seconds": ["50", "40"], "properties": ["person, spray, yell", "child, space, speak"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["a person is burping then speaks and laughs", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["wAAkbZToh8", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["burp, laugh, speak", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man burps and a woman speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 0}, {"captions": ["the wind blows while a vehicle engine runs", "a clock ticktocks"], "sample_ids": ["xyL9F5VrjkE", "v-g-j2uTByM"], "start_seconds": ["20", "30"], "properties": ["wind, blows, vehicle", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["an animal quacks rapidly", "water is sprayed across a hard surface"], "sample_ids": ["vh30P49Po6s", "sQwlkXjQabo"], "start_seconds": ["30", "10"], "properties": ["animal, quacks, rapidly", "water, spray, surface"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a duck is quacking loudly", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "pigeons vocalize and birds chirp"], "sample_ids": ["y2bVZ7rz-5M", "uiS58TNyUiw"], "start_seconds": ["280", "430"], "properties": ["engine, horn, siren", "vocalize, bird, chirp"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "of the pigeon in the cage"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a vehicle?", "label": 1}, {"captions": ["a man speaks over intermittent keyboard taps", "paper is crumpling consistently"], "sample_ids": ["tw76HGONaKg", "v5cSxLaHADY"], "start_seconds": ["570", "0"], "properties": ["audio, man, keyboard", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "paper is crumpled and crinkled"], "question": "which entity is a video", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["uEU-Hg5MTN8", "xfaoyyzw2WU"], "start_seconds": ["27", "180"], "properties": ["animal, grunts, snorts", "loud, jet engine, roar"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["wind blows and women speak as livestock vocalizes", "a baby laughs giddily and a woman laughs then speaks"], "sample_ids": ["vXlk0lIQBFo", "wjsXBsc7M40"], "start_seconds": ["470", "10"], "properties": ["wind, speak, vocalize", "a baby laughs, a woman laughs, a woman speaks"], "captions_pred_video": ["- a woman and two donkeys in a fenced in area", "footage of the baby playing with a toothbrush"], "captions_pred_audio": ["wind chimes are ringing and people are speaking and laughing ", "a baby laughs and a woman speaks"], "question": "which entity is about a baby and a woman?", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "a car speeding up in the distance"], "sample_ids": ["vf9xf3vMsGM", "u0TrcHhkPQ"], "start_seconds": ["540", "20"], "properties": ["A man speaks while turning a water faucet on.", "distance, car, speed"], "captions_pred_video": ["of the person washing their hands under the faucet", null], "captions_pred_audio": ["a man is speaking while water is running in the background", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["water flows followed by women screaming", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["w5W5Kqtc8E", "tDVADusiIoc"], "start_seconds": ["100", "60"], "properties": ["water, flow, women", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows water flowing?", "label": 0}, {"captions": ["an emergency siren wails as it passes", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["vGj1XLJvNrw", "yDoT73BWsdA"], "start_seconds": ["0", "10"], "properties": ["wails, wails, pass", "engine, revs, vehicle"], "captions_pred_video": ["footage of a police car driving down a city street", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["food fries in a pan as someone talks and cooks", "a speedboat passes quickly on the water"], "sample_ids": ["ukxt9I7eMMg", "tjmoSi330GM"], "start_seconds": ["30", "23"], "properties": ["food, pan, cook", "speed, water, boat"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "a person riding a jet ski on a lake with trees in the background"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a motorboat speeds through water with wind noise "], "question": "which is moving faster", "label": 0}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["wqZ135Ssz0", "y2bVZ7rz-5M"], "start_seconds": ["60", "280"], "properties": ["man, woman, squawks", "motor noise, horn, siren"], "captions_pred_video": [null, "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is accompanied by a horn", "label": 1}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["w9lpbUn0hPc", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["male, wind, rustling", "beeps, hit, woman"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["continuous snoring", "waves crash against a shoreline and people speak"], "sample_ids": ["sLkeqCDJIyw", "yFB25fqfU8I"], "start_seconds": ["120", "300"], "properties": ["loud, snoring, noise", "wave, crash, shoreline"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is louder", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "insects humming with a dog barking and small goat bleating"], "sample_ids": ["uJV8NDaHqqk", "tIY7qOV3rEM"], "start_seconds": ["100", "0"], "properties": ["loud, fly, chirp", "animal, bark, dog, barking, small, goat, bleating"], "captions_pred_video": ["a bee hive in a wooden box", "a dog is standing in the middle of a dirt road in the woods"], "captions_pred_audio": ["a swarm of bees buzzing around", "a dog is barking and a cat is meowing"], "question": "which animal is barking", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "a man speaks over intermittent keyboard taps"], "sample_ids": ["sYITalLZjj4", "tw76HGONaKg"], "start_seconds": ["30", "570"], "properties": ["water, rushes, background, birds", "audio, man, keyboard"], "captions_pred_video": ["two ducks are swimming in the water near each other", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["wind blows and birds chirp", "a man speaks and types on a computer keyboard "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "wind blows as people chatter quietly"], "sample_ids": ["s4Uz1Ffgo04", "xBxDz0CFVn0"], "start_seconds": ["100", "30"], "properties": ["roars, background, people speaking", "wind, chatter, people"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["food is frying then a woman speaks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["ukxt9I7eMMg", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["food, woman, speak", "men, talk, cars"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about cars?", "label": 1}, {"captions": ["an insect buzzes around continuously", "people speak as gunfire rings out"], "sample_ids": ["v25l1jef3JY", "wqTCwqVRDlk"], "start_seconds": ["0", "80"], "properties": ["buzzes, continuously, insect", "gunfire, ring, speak"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["waves of water rumble", "a car accelerates and wind blows"], "sample_ids": ["vwqaIHKxLvM", "u0TrcHhkPQ"], "start_seconds": ["20", "20"], "properties": ["sound, wave, water", "accelerates, wind, blows"], "captions_pred_video": ["of a surfer riding a big wave in the ocean", null], "captions_pred_audio": ["waves crash and wind blows ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["an engine runs and wind blows", "a child speaks in closed space"], "sample_ids": ["vs65y4qmyBE", "yW6FWLSLkx4"], "start_seconds": ["340", "40"], "properties": ["engine, run, wind", "child, space, speak"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "water splashes as an animal walks through"], "sample_ids": ["x9JovgqUcs", "w1ir-sZ3Im8"], "start_seconds": ["500", "90"], "properties": ["a, man, speaks, keyboard", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man speaks and types on a keyboard", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["birds chirp and an owl hoots before a man speaks briefly", "someone is typing on a computer keyboard"], "sample_ids": ["wRBHTgrbiwg", "v0x1odnXtP0"], "start_seconds": ["50", "210"], "properties": ["bird, owl, speak", "keyboard, type, computer"], "captions_pred_video": ["of a bee pollinating the flowers in the field", "how to make money on youtube in spanish"], "captions_pred_audio": ["birds are chirping and insects are buzzing", "a person is typing on a keyboard"], "question": "which entity is a person", "label": 1}, {"captions": ["a person is burping then speaks and laughs", "someone is typing on a computer keyboard"], "sample_ids": ["wAAkbZToh8", "v0x1odnXtP0"], "start_seconds": ["0", "210"], "properties": ["burp, laugh, speak", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a man burps and a woman speaks", "a person is typing on a keyboard"], "question": "which person is typing on a computer keyboard", "label": 1}, {"captions": ["a clicking followed by some people laughing and a kid speaking", "three men talk while wind blows and some liquid flows"], "sample_ids": ["vz8868znkVQ", "vJ7JPEFhyLA"], "start_seconds": ["60", "16"], "properties": ["audio, click, kid speaking", "three men, wind, flow"], "captions_pred_video": ["a video of a plane flying over a cloudy sky", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a baby is laughing and breathing with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video", "label": 1}, {"captions": ["water splashes as an animal walks through", "water pouring and bubbling"], "sample_ids": ["w1ir-sZ3Im8", "uyRfq-jKPpo"], "start_seconds": ["90", "50"], "properties": ["animal, water, splashes", "water, bubbles, pouring"], "captions_pred_video": ["footage of a group of people riding horses through a river", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["water splashes and gurgles as people speak", "water is running from a faucet"], "question": "which entity is more calm", "label": 1}, {"captions": ["an airplane engine runs", "a person screams glaringly"], "sample_ids": ["yVPZ2MNWpms", "xC8kbrKJmco"], "start_seconds": ["0", "0"], "properties": ["engine, airplane, runs", "glaringly, screams, person"], "captions_pred_video": ["footage of an airport with planes parked on the tarmac", null], "captions_pred_audio": ["a car is driving by on the road ", "a goat is bleating "], "question": "which entity is louder", "label": 1}, {"captions": ["a man laughs and speaks as cats purr and hiss", "a propeller rotates loudly and intensely"], "sample_ids": ["vVhthZ45k3Y", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["cat, purr, hiss", "loud, intense, propeller"], "captions_pred_video": ["footage is blurry and out of focus", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["sjlVMgdGSK0", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["accelerates, vehicle, race car", "applause, audience, yells"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "people applaud and hoot and chat quietly"], "sample_ids": ["uEU-Hg5MTN8", "wwyfGO2J4"], "start_seconds": ["27", "90"], "properties": ["animal, grunts, snorts", "people, applaud, hoot"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks as horns blow", "dog barking and vehicle engine idling followed shortly by vehicle engine revving"], "sample_ids": ["tHyNqRyK34A", "zY3icUyMdh8"], "start_seconds": ["24", "20"], "properties": ["a, man, speaks", "dog, bark, engine"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "footage of a bus driving through a residential street at night"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a car is driving and dogs are barking and squealing "], "question": "which entity is more quiet", "label": 0}, {"captions": ["an engine starts and increases in power", "a woman speaks and other women and a man talk with her"], "sample_ids": ["zjTG0gaGCUI", "vbpKkWvfOu4"], "start_seconds": ["80", "560"], "properties": ["power, increase, engine", "a, woman, man"], "captions_pred_video": [null, "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a jet engine roars as wind blows ", "a woman is speaking and a man is speaking"], "question": "which entity is a group of people", "label": 1}, {"captions": ["a child babbles as a woman speaks", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["wEBlkGWVWwE", "uZesmtKZGSw"], "start_seconds": ["260", "250"], "properties": ["a, babble, woman", "men, talk, cars"], "captions_pred_video": ["shows a person writing on the whiteboard", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a man speaks as water trickles down a stream", "a duck quacks continuously"], "sample_ids": ["sapQIQUhFc", "vh30P49Po6s"], "start_seconds": ["280", "30"], "properties": ["water, stream, trickles", "quacks, continuously, duck"], "captions_pred_video": [null, "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "someone whistles a tune"], "sample_ids": ["vh30P49Po6s", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["loud, continuous, quacks", "someone, tune, whistle"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "a person whistling a song"], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a small musical boom and then birds tweet and a few dogs pant"], "sample_ids": ["sK4u5T8hW78", "y2ZBGpgbhHM"], "start_seconds": ["30", "30"], "properties": ["a, car, pass", "birds, tweet, pant"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "birds chirping and a dog panting"], "question": "which entity has more animals", "label": 1}, {"captions": ["male speech followed by light wind, rustling, distant speech and brief hissing", "frogs croak and vocalize"], "sample_ids": ["w9lpbUn0hPc", "yswmmRZFItk"], "start_seconds": ["30", "0"], "properties": ["male, wind, rustling", "croak, vocalize, frog"], "captions_pred_video": ["footage of a man in a black shirt standing in front of a white truck in a parking lot", "a close up of a frog in the water"], "captions_pred_audio": ["a man is speaking with wind noise and breathing sounds in the background ", "a frog is croaking"], "question": "which entity is a frog?", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "multiple people speak and children yell while water gurgles"], "sample_ids": ["uJV8NDaHqqk", "vb1fPSDI4c"], "start_seconds": ["100", "30"], "properties": ["loud, fly, chirp", "multiple, people, yell"], "captions_pred_video": ["a bee hive in a wooden box", null], "captions_pred_audio": ["a swarm of bees buzzing around", "a crowd of people are talking and laughing"], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks as horns blow", "vehicles pass by on a roadway"], "sample_ids": ["tHyNqRyK34A", "tgbONvsP47Y"], "start_seconds": ["24", "0"], "properties": ["a, man, speaks", "pass, vehicle, roadway"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "a car is driving on the road "], "question": "which entity is more active", "label": 1}, {"captions": ["heavy rain splashes as it falls", "a motor idles, accelerates, then slows down."], "sample_ids": ["wP8ZKrlx3oA", "vYkA3cfXp5Q"], "start_seconds": ["40", "30"], "properties": ["fall, rain, splash", "speed, idle, accelerate"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a heavy rain is falling on a surface", "an engine is idling"], "question": "which entity is not a motor?", "label": 0}, {"captions": ["a horn honks twice and keys jingle, followed by a slam and an electronic beep", "a vehicle engine accelerating then running on idle"], "sample_ids": ["wSVhSdj0F0", "vYkA3cfXp5Q"], "start_seconds": ["10", "30"], "properties": ["horn honks, keys jingle, slam", "engine, accelerate, idle"], "captions_pred_video": [null, "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vf44CgrjT0A", "uYT5gxnyMWM"], "start_seconds": ["20", "50"], "properties": ["loud, long, person", "female, spraying, scream"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a loud burp", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 0}, {"captions": ["a dog barks and whimpers", "an airplane flies overhead as a woman speaks"], "sample_ids": ["sShpyu2l4YQ", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["barks, whimpers, dog", "airplane, fly, overhead"], "captions_pred_video": ["the puppies are playing with a toy", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a dog is barking and growling", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying overhead", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "people applaud and hoot and chat quietly"], "sample_ids": ["tEE3MpBt1sg", "wwyfGO2J4"], "start_seconds": ["50", "90"], "properties": ["drill, something, laugh", "people, applaud, hoot"], "captions_pred_video": ["footage is blurry due to the smoke in the air", null], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "people are clapping and speaking with background noise "], "question": "which entity is more likely to be a performance", "label": 1}, {"captions": ["an insect buzzes around continuously", "an engine runs loudly"], "sample_ids": ["v25l1jef3JY", "vqZuVbG6-HI"], "start_seconds": ["0", "130"], "properties": ["buzzes, continuously, insect", "loud, engine, run"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["birds chirp as a bell rings", "a car accelerates and wind blows"], "sample_ids": ["ziUT9IFTkjg", "u0TrcHhkPQ"], "start_seconds": ["10", "20"], "properties": ["chirp, bell, ring", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a frog vocalizes while birds chirp", "a car accelerates and wind blows"], "sample_ids": ["vMf1dLD6Sng", "u0TrcHhkPQ"], "start_seconds": ["6", "20"], "properties": ["frog, bird, vocalize", "accelerates, wind, blows"], "captions_pred_video": ["a frog in a pond with pink flowers in the background", null], "captions_pred_audio": ["a frog croaks loudly", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a drill drills through something then people begin laughing", "a woman speaks as she rubs two objects together"], "sample_ids": ["tEE3MpBt1sg", "vzxHnu-SFEw"], "start_seconds": ["50", "80"], "properties": ["drill, something, laugh", "two objects, woman, speak"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a demonstration of a tool?", "label": 0}, {"captions": ["a man speaks while a rooster crows and other people speak in the background", "a man speaks as a car is passing by"], "sample_ids": ["wz7N8YRy74I", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["rooster, crow, background, people", "a, car, pass"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has a rooster in it?", "label": 0}, {"captions": ["birds vocalize and chirp continuously", "three men talk while wind blows and some liquid flows"], "sample_ids": ["w1mlz3Pe4fU", "vJ7JPEFhyLA"], "start_seconds": ["300", "16"], "properties": ["vocalize, chirp, continuously", "three men, wind, flow"], "captions_pred_video": ["of a bird in a cage", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["birds are chirping and singing", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a bird?", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "wind blowing followed by a zoom"], "sample_ids": ["sYITalLZjj4", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["water, rushes, background, birds", "wind, blow, zoom"], "captions_pred_video": ["two ducks are swimming in the water near each other", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["wind blows and birds chirp", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more calm", "label": 0}, {"captions": ["children cry and people talk", "a person snores loudly multiple times at a close distance"], "sample_ids": ["xLwHe825Zs", "sSMl2vc3ek"], "start_seconds": ["18", "20"], "properties": ["people talk, children cry, people talk", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby cries and a woman speaks", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a baby cries and a woman moans", "birds chirp, a woman speaks, and insects buzz"], "sample_ids": ["smDKStoHBJo", "t97k0cejSQE"], "start_seconds": ["0", "250"], "properties": ["a, cry, woman", "sound, chirp, buzz"], "captions_pred_video": ["a man holding a crying baby in his arms", "a bee on a purple thistle flower"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a bee buzzes and a woman speaks"], "question": "which entity has a higher pitch", "label": 1}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["t69a8aRKhmc", "ziUT9IFTkjg"], "start_seconds": ["30", "10"], "properties": ["a, b, c", "background, birds, rustling"], "captions_pred_video": ["footage is blurry and out of focus", null], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "birds are chirping and a chime is ringing "], "question": "which entity has a bird in the background?", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["vZAw4apG0Es", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["background, clock, ticktocks", "beeps, hit, woman"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a clock is ticking and people are talking", "a beep sounds followed by a child speaking"], "question": "which entity has a clock ticking in the background?", "label": 0}, {"captions": ["a man speaks then multiple motorcycles pass by", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["zcDwZ6W7E3E", "ukg5L09Wpvo"], "start_seconds": ["180", "150"], "properties": ["a, man, speak", "clickety-clack, train, whistle"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a infant makes noise and is excited"], "sample_ids": ["s4Uz1Ffgo04", "wIJK3-5y0kA"], "start_seconds": ["100", "30"], "properties": ["roars, background, people speaking", "noise, excited, infant"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a baby cries and a woman speaks"], "question": "which entity is quieter", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "water splashes as an animal walks through"], "sample_ids": ["zl9Dqx-j7q4", "w1ir-sZ3Im8"], "start_seconds": ["6", "90"], "properties": ["engine, laugh, loud", "animal, water, splashes"], "captions_pred_video": ["footage of a man driving a car in the dark", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a jet engine roars ", "water splashes and gurgles as people speak"], "question": "which entity is more quiet", "label": 1}, {"captions": ["birds twitter and chirp and clatter", "a clock ticktocks"], "sample_ids": ["yeFvk9x0wWI", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["chirp, twitter, clatter", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a chime of a clock followed by various tones of ticking with come clinking", "a man speaks, another man speaks, and a small bell dings"], "sample_ids": ["uqFtmnhuqA8", "t69a8aRKhmc"], "start_seconds": ["30", "30"], "properties": ["a, b, c", "a, b, c"], "captions_pred_video": ["shows a clock on the wall of a room with a person standing in front of it", "footage is blurry and out of focus"], "captions_pred_audio": ["mechanisms are ticking and a hammer is striking ", "a man is speaking and birds are chirping in the background "], "question": "which entity has a ding?", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "small dogs yip and bark sharply"], "sample_ids": ["vqZuVbG6-HI", "v-wcQf4BDY0"], "start_seconds": ["130", "120"], "properties": ["background, male, female", "bark, yip, sharply"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a dog barks and growls"], "question": "which entity is more aggressive", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "an insect buzzes around continuously"], "sample_ids": ["vms5XGTDVQc", "v25l1jef3JY"], "start_seconds": ["220", "0"], "properties": ["paper, crumpled, crinkled", "buzzes, continuously, insect"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["paper is crumpled and crinkled", "a fly is buzzing around a microphone "], "question": "which entity is a living thing", "label": 1}, {"captions": ["a motorcycle engine works nearby", "several insects fly while two men talk"], "sample_ids": ["tOSWIURC-4", "s-T9OVOiMLo"], "start_seconds": ["0", "330"], "properties": ["engine, work, nearby", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a lawn mower is running ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a living thing", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "vehicles pass by on a roadway"], "sample_ids": ["y2bVZ7rz-5M", "tgbONvsP47Y"], "start_seconds": ["280", "0"], "properties": ["motor noise, horn, siren", "pass, vehicle, roadway"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a car is driving on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "some tunes played by whistling"], "sample_ids": ["tQWGZLItBXk", "u6BnG6YZqJ4"], "start_seconds": ["170", "0"], "properties": ["voice, music, whoosh", "tune, play, whistling"], "captions_pred_video": ["worms revolution screenshots", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a vehicle engine runs and wind blows before women yell", "waves crash against a shoreline and people speak"], "sample_ids": ["w5W5Kqtc8E", "yFB25fqfU8I"], "start_seconds": ["100", "300"], "properties": ["wind, blow, vehicle", "wave, crash, shoreline"], "captions_pred_video": [null, "footage of a person surfing in the ocean"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more likely to be in a storm", "label": 1}, {"captions": ["a man speaks as a car is passing by", "someone is typing on a computer keyboard"], "sample_ids": ["sK4u5T8hW78", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["a, car, pass", "keyboard, type, computer"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a person is typing on a keyboard"], "question": "which is not a person", "label": 1}, {"captions": ["birds chirp as a bell rings", "music plays and a woman speaks on a radio before gunshots are fired"], "sample_ids": ["ziUT9IFTkjg", "xKB8O8LTs6s"], "start_seconds": ["10", "70"], "properties": ["chirp, bell, ring", "music, radio, gunshots"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["vBHyYJ8pL0", "uZesmtKZGSw"], "start_seconds": ["2", "250"], "properties": ["noise, door, opening", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is about cars?", "label": 1}, {"captions": ["a man speaks while water trickles and flows", "people cheer as a vehicle engine revs"], "sample_ids": ["sapQIQUhFc", "xjhAnI2q6hM"], "start_seconds": ["280", "6"], "properties": ["water, trickles, flow", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["an infant crying and a woman speaking with some distant murmuring", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["smDKStoHBJo", "wqZ135Ssz0"], "start_seconds": ["0", "60"], "properties": ["a, infant, speaking", "two men, woman, birds"], "captions_pred_video": ["a man holding a crying baby in his arms", null], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a male speaks over some small clicks", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["uXxVebHsGZ8", "ziUT9IFTkjg"], "start_seconds": ["30", "10"], "properties": ["male, clicks, speak", "background, birds, rustling"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man speaks and types on a computer keyboard", "birds are chirping and a chime is ringing "], "question": "which entity has a background of birds chirping", "label": 1}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "a toilet flushes and water drains"], "sample_ids": ["yNtRmrn0io8", "sfAvvZwdLCY"], "start_seconds": ["210", "20"], "properties": ["storm, distance, strike", "water drains, flushes, water"], "captions_pred_video": ["footage of a house in the middle of the night", "footage of the toilet in the bathroom"], "captions_pred_audio": ["rain falls and thunder roars", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["an aircraft engine runs", "a horn rings out as a machine runs by"], "sample_ids": ["yLCORCnd35Q", "slZLHwNbbt4"], "start_seconds": ["0", "300"], "properties": ["engine, aircraft, runs", "a, horn, run"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a machine?", "label": 1}, {"captions": ["a duck quacks continuously", "vehicles pass by on a roadway"], "sample_ids": ["vh30P49Po6s", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["quacks, continuously, duck", "pass, vehicle, roadway"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a duck is quacking loudly", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["an adult woman speaks over chopping and silverware noises", "a person snores loudly multiple times at a close distance"], "sample_ids": ["yYJksgsxx5U", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["audio, woman, silverware", "loud, multiple, distance"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", null], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["sSMl2vc3ek", "uEU-Hg5MTN8"], "start_seconds": ["20", "27"], "properties": ["a person, laughs, snores", "animal, grunts, snorts"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking and a baby is crying"], "question": "which entity is a person?", "label": 0}, {"captions": ["pigeons vocalize and birds chirp", "a person snores loudly multiple times at a close distance"], "sample_ids": ["uiS58TNyUiw", "sSMl2vc3ek"], "start_seconds": ["430", "20"], "properties": ["vocalize, bird, chirp", "loud, multiple, distance"], "captions_pred_video": ["of the pigeon in the cage", null], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["rwTERCUno", "su6FAOcOA8c"], "start_seconds": ["90", "4"], "properties": ["engine, idle, sputter", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["an engine is idling and vibrating", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "race cars go around a track as a man commentates"], "sample_ids": ["wz7N8YRy74I", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["rooster, crow, background, men", "car, track, man"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["a man speaks in the background while a slow tick repeats", "vehicles pass by on a roadway"], "sample_ids": ["vZAw4apG0Es", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["background, tick, repeat", "pass, vehicle, roadway"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a clock is ticking and people are talking", "a car is driving on the road "], "question": "which entity has a lot of vehicles passing by on a roadway?", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "people speak as gunfire rings out"], "sample_ids": ["t8CV69hcvF0", "wqTCwqVRDlk"], "start_seconds": ["210", "80"], "properties": ["person, sneeze, follow", "gunfire, ring, speak"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman sneezes and speaks", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "an infant crying as a woman laughs"], "sample_ids": ["zcDwZ6W7E3E", "xhmRY9yhC7c"], "start_seconds": ["180", "20"], "properties": ["man, speak, motorcycles", "a, laugh, infant"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a siren comes to life as a horn blares"], "sample_ids": ["y2bVZ7rz-5M", "u--KhUW8l1Y"], "start_seconds": ["280", "0"], "properties": ["motor noise, horn, siren", "horn, siren, life"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "a firefighter spraying water from a fire hydrant at night"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a fire truck siren blares and a horn blows "], "question": "which entity has a horn that is blaring?", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "water is sprayed across a hard surface"], "sample_ids": ["uEU-Hg5MTN8", "sQwlkXjQabo"], "start_seconds": ["27", "10"], "properties": ["a woman, laughs, animal", "water, spray, surface"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "an airplane engine runs"], "sample_ids": ["soTOh3zYJfY", "yVPZ2MNWpms"], "start_seconds": ["40", "0"], "properties": ["vehicle, skid, tires", "engine, airplane, runs"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a car is driving by on the road "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["some liquid flows while a woman laughs and man talks", "a race car approaches quickly and slows down squealing tires"], "sample_ids": ["vddP56-ogds", "sEprKHm8Sj8"], "start_seconds": ["30", "90"], "properties": ["liquid, laughs, man", "car, tires, slows"], "captions_pred_video": [null, "rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "a male speaks and another male speaks"], "sample_ids": ["xV7Mg1QucSc", "viuTg1M-dqg"], "start_seconds": ["14", "30"], "properties": ["alarm, ticktocks, laughs", "two males, speaking, male"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more males speaking", "label": 1}, {"captions": ["an animal growls followed by birds chirping", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["y2ZBGpgbhHM", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["animal, growl, bird", "men, talk, cars"], "captions_pred_video": [null, "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["birds chirping and a dog panting", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["food is frying while a woman speaks", "someone snores nearby"], "sample_ids": ["yhQ2Lg-7qDY", "spJCm8tD9Zo"], "start_seconds": ["130", "90"], "properties": ["food, woman, speak", "someone snores, nearby, someone"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a person is snoring loudly"], "question": "what is a person doing in the first picture?", "label": 0}, {"captions": ["a child and woman laughs and the woman speaks", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["uPDn2BFTHk", "zj2R0XoFr5k"], "start_seconds": ["140", "50"], "properties": ["woman, laughs, speaks", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a woman speaks while a helicopter flies overhead "], "question": "which entity shows a child and woman laughs and the woman speaks?", "label": 0}, {"captions": ["a girl talking, laughing and sneezing noise", "an engine runs loudly"], "sample_ids": ["y4tPJXBKDig", "vqZuVbG6-HI"], "start_seconds": ["20", "130"], "properties": ["a, noise, talk", "loud, engine, run"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a lawn mower is running and men are speaking "], "question": "which noise is louder", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a woman speaks and is crumpling paper"], "sample_ids": ["uEU-Hg5MTN8", "xvDdE3zNf8Y"], "start_seconds": ["27", "120"], "properties": ["a woman, laughs, animal", "A, crumple, paper"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "of a woman in a white shirt and glasses holding a purple tie"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman speaks and crumples paper"], "question": "which woman is crumpling paper", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a vehicle engine runs and someone speaks"], "sample_ids": ["xKB8O8LTs6s", "zF8yoL0rkbI"], "start_seconds": ["70", "30"], "properties": ["music, radio, gunshots", "engine, run, someone"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage of the traffic on the street at night"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "the wind is blowing hard and water is splashing"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a woman speaks with water running", "propeller rearing loudly with some male and female voices interspersed in the background"], "sample_ids": ["wTideSjRFS0", "vqZuVbG6-HI"], "start_seconds": ["30", "130"], "properties": ["water, running, woman", "background, male, female"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a lawn mower is running and men are speaking "], "question": "which entity has a woman speaking with water running in the background?", "label": 0}, {"captions": ["a train horn blares as a train passes, then fades", "people speak as gunfire rings out"], "sample_ids": ["zVacuqSb4LI", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["blares, fades, train", "gunfire, ring, speak"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a man is speaking and a gun is fired"], "question": "which entity is more quiet", "label": 0}, {"captions": ["dogs barking and whimpering", "someone whistles a tune"], "sample_ids": ["tIY7qOV3rEM", "sIXTftIuUgw"], "start_seconds": ["0", "90"], "properties": ["barking, whimpering, dog", "someone, tune, whistle"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", null], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a person whistling a song"], "question": "which entity is a human", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "birds chirp and objects are moved around"], "sample_ids": ["vBslzh7saPw", "yPUYU6t3rwo"], "start_seconds": ["90", "370"], "properties": ["power, scream, increase", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a jet engine roars and accelerates ", "insects buzz and a man speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a diesel truck engine runs continuously", "motors runs briefly and tires screech"], "sample_ids": ["sZvwOuuPGP0", "yRx9txMcBl0"], "start_seconds": ["50", "40"], "properties": ["engine, diesel, truck", "motors, tires, screech"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "in 10 words or less the video is about a red mustang driving down a street at night in grand theft auto 5 gta 5, gta 5 mods, gta 5 cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta 5 mods cars, gta"], "captions_pred_audio": ["a medium engine is running ", "a car is revving its engine and skidding "], "question": "which entity has a continuous running engine", "label": 0}, {"captions": ["a clock ticktocks in wind", "a duck quacks loudly and continuously"], "sample_ids": ["yVumC9TGknc", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, wind", "loud, continuous, quacks"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a series of beeps and chirps", "a duck is quacking loudly"], "question": "which entity is quieter", "label": 0}, {"captions": ["animals bleat and cry out and then a woman speaks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["yZp6xizR0yU", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["animal, bleat, cry", "stream, water, flow"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "footage is blurry and out of focus"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a man is speaking with wind noise in the background "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a man talks as several small engines run", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["u9A6VZQCZpU", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["a, man, talk", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking while a race car is revving and accelerating ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is about a boy flying an airplane?", "label": 1}, {"captions": ["a bird is chirping and tweeting a bird song", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["wPz6QRAkEb4", "zFjIWfSD-4"], "start_seconds": ["60", "410"], "properties": ["chirps, tweets, song", "People, motor, brakes"], "captions_pred_video": ["a bird in a cage on top of a pole", null], "captions_pred_audio": ["birds are chirping in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is not a bird?", "label": 1}, {"captions": ["pigeons vocalize and birds chirp", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["uiS58TNyUiw", "zFjIWfSD-4"], "start_seconds": ["430", "410"], "properties": ["vocalize, bird, chirp", "People, motor, brakes"], "captions_pred_video": ["of the pigeon in the cage", null], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is not a symphony", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "an airplane engine spools and people speak"], "sample_ids": ["xKB8O8LTs6s", "wTjoRj1se3U"], "start_seconds": ["70", "390"], "properties": ["music, gunfire, explosion", "airplane, engine, spool"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a jet engine is running and people are talking"], "question": "which entity is a video of an airplane engine spooling?", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "a machine beeps continuously"], "sample_ids": ["wnpJndXuxLc", "y682ml90jGw"], "start_seconds": ["50", "11"], "properties": ["blows, vehicle, train", "beeps, machine, continuously"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", null], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["speaking following by laughing and clapping", "water pouring and bubbling"], "sample_ids": ["u2f5NpsoHBg", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["person, laugh, clap", "water, bubbles, pouring"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a baby cries and wails as an adult female speaks", "wind blows as people chatter quietly"], "sample_ids": ["zliInBdC98Y", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["a, baby, cries, wails", "wind, chatter, people"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", "footage is blurry and out of focus"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a small engine spits as it runs", "paper is crumpling consistently"], "sample_ids": ["sZvwOuuPGP0", "v5cSxLaHADY"], "start_seconds": ["50", "0"], "properties": ["spits, engine, runs", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a medium engine is running ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["yZmhM1HcsyE", "w5W5Kqtc8E"], "start_seconds": ["4", "100"], "properties": ["engine, roar, water", "wind, blow, vehicle"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", null], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a girl talking, laughing and sneezing noise", "a person snores loudly multiple times at a close distance"], "sample_ids": ["y4tPJXBKDig", "sSMl2vc3ek"], "start_seconds": ["20", "20"], "properties": ["a, noise, talk", "loud, multiple, distance"], "captions_pred_video": ["footage of the woman wiping her nose with a tissue", null], "captions_pred_audio": ["a woman is speaking and coughing with background noise and breathing ", "a person snoring loudly"], "question": "which noise is louder", "label": 1}, {"captions": ["a dog barks and whimpers", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["sShpyu2l4YQ", "tiDFTC-5vU"], "start_seconds": ["0", "30"], "properties": ["barks, whimpers, dog", "male, duck, laugh"], "captions_pred_video": ["the puppies are playing with a toy", null], "captions_pred_audio": ["a dog is barking and growling", "a man is speaking and ducks are quacking"], "question": "which entity is speaking", "label": 1}, {"captions": ["a car speeding up in the distance", "people applaud and hoot and chat quietly"], "sample_ids": ["u0TrcHhkPQ", "wwyfGO2J4"], "start_seconds": ["20", "90"], "properties": ["distance, car, speed", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "people are clapping and speaking with background noise "], "question": "which entity is moving faster", "label": 0}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "a woman speaks happily and an animal chirps"], "sample_ids": ["ul60S8TXDA8", "uWAAAL4CIoc"], "start_seconds": ["60", "0"], "properties": ["sound, distance, bell", "a woman, chirps, animal"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", null], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a woman is speaking and a dog is barking "], "question": "which entity is more quiet", "label": 1}, {"captions": ["long loud burping by a man", "water flows as men speak and yell"], "sample_ids": ["xmiUIOhtZyQ", "vJ7JPEFhyLA"], "start_seconds": ["60", "16"], "properties": ["loud, burp, man", "water, flow, men"], "captions_pred_video": ["homer simpson drinking a beer", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a person burps and music plays in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a man speaking and yelling?", "label": 1}, {"captions": ["a motor noise is accompanied by a door opening and closing", "a woman speaks as she rubs two objects together"], "sample_ids": ["vBHyYJ8pL0", "vzxHnu-SFEw"], "start_seconds": ["2", "80"], "properties": ["noise, door, opening", "two objects, woman, speak"], "captions_pred_video": [null, "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["mechanisms are ticking and a sliding door is opening and closing ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["an engine sputters followed by a car zooming by", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["u5RmF3c3Aw", "y2bVZ7rz-5M"], "start_seconds": ["60", "280"], "properties": ["engine, car, zoom", "motor noise, horn, siren"], "captions_pred_video": [null, "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a race car accelerates and skids with wind noise in the background ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is followed by a car zooming by", "label": 0}, {"captions": ["people converse in the distance as a clock ticks", "a person is whistling"], "sample_ids": ["vZAw4apG0Es", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["people, clock, converse", "person, whistling, person"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a person whistling a song"], "question": "which person is whistling", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["zofjfKhqLk8", "uYT5gxnyMWM"], "start_seconds": ["10", "50"], "properties": ["noise, stop, motor", "a, scream, girl"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a woman is speaking and a baby is crying"], "question": "which entity has more noise", "label": 1}, {"captions": ["bird squawks are accompanied by a man and woman speaking", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["wqZ135Ssz0", "vlS6YMeWAPo"], "start_seconds": ["60", "40"], "properties": ["man, woman, squawks", "sheep, baa, birds"], "captions_pred_video": [null, "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a goat bleats and birds chirp"], "question": "which entity has more animals", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["vlJS7LN2XyM", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["background, clocks, ticking", "harsh, wind, blows"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking with wind noise in the background "], "question": "which entity is more calm", "label": 0}, {"captions": ["men speak and a nozzle sprays liquid", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["wRV8yMk886E", "w34HjHr6gAY"], "start_seconds": ["0", "30"], "properties": ["liquid, spray, nozzle", "beeps, hit, woman"], "captions_pred_video": ["two cars are parked in a parking lot at night", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["an infant crying as a woman laughs", "a man speaks as a motor runs in the background"], "sample_ids": ["xhmRY9yhC7c", "xZepNM9qcRA"], "start_seconds": ["20", "30"], "properties": ["a, laugh, infant", "background, motor, run"], "captions_pred_video": ["of a baby crying in a baby bouncer", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is a person", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a child speaks in closed space"], "sample_ids": ["sK4u5T8hW78", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["a, car, pass", "child, space, speak"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["water is sprayed across a hard surface", "a woman speaks and taps on a hard surface before running tap water"], "sample_ids": ["sQwlkXjQabo", "wvKpEYswXO0"], "start_seconds": ["10", "150"], "properties": ["water, spray, surface", "water, tap, run"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "of the person preparing food in the kitchen"], "captions_pred_audio": ["spraying followed by silence", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is a source of water", "label": 1}, {"captions": ["a cat meows and children speak", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["x5cuQjOdM3E", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["cat, speak, children", "animal, grunts, snorts"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a cat meows and a woman speaks", "a woman is speaking and a baby is crying"], "question": "which entity is more active", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "a horn blasts as warning bells ring"], "sample_ids": ["w34HjHr6gAY", "zgUgkpk78xU"], "start_seconds": ["30", "70"], "properties": ["beeps, squawk, child speaking", "horn, bells, ring"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a train blows its horn as it speeds down the tracks "], "question": "which entity is a warning", "label": 1}, {"captions": ["a woman speaks and laughs and an animal grunts and snorts", "a duck quacks continuously"], "sample_ids": ["uEU-Hg5MTN8", "vh30P49Po6s"], "start_seconds": ["27", "30"], "properties": ["animal, grunts, snorts", "quacks, continuously, duck"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a duck is quacking loudly"], "question": "which animal is speaking", "label": 1}, {"captions": ["a vehicle engine runs as a siren and horn sound", "an infant crying as a woman laughs"], "sample_ids": ["u--KhUW8l1Y", "xhmRY9yhC7c"], "start_seconds": ["0", "20"], "properties": ["sound, vehicle, horn", "a, laugh, infant"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "a helicopter engine runs continuously"], "sample_ids": ["vhJWZheqaE", "ugHJF0hfYkg"], "start_seconds": ["0", "10"], "properties": ["water drains unevenly, toilet flushes, water drains", "engine, running, continuously"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a toilet is flushed", "a helicopter is flying overhead "], "question": "which entity is not running continuously?", "label": 0}, {"captions": ["a person is burping then speaks and laughs", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["wAAkbZToh8", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["burp, laugh, speak", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man burps and a woman speaks", "a woman is speaking and a baby is crying"], "question": "which entity is a person?", "label": 0}, {"captions": ["a woman speaks as frying food sizzles", "a telephone rings followed by a woman talking"], "sample_ids": ["wTideSjRFS0", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["food, sizzle, woman", "ring, talk, woman"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a dial tone sounds followed by a woman speaking"], "question": "which woman is talking", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "people speak as gunfire rings out"], "sample_ids": ["u7C-AEBQM", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["ticks, rhythmic, quiet", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking and a gun is fired"], "question": "which entity is more quiet", "label": 0}, {"captions": ["a jet engine spools up and takes off", "a man speaks while water trickles and flows"], "sample_ids": ["vBslzh7saPw", "sapQIQUhFc"], "start_seconds": ["90", "280"], "properties": ["engine, spools, takes", "water, trickles, flow"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "a man is speaking and a stream is flowing in the background "], "question": "which entity is a moving object", "label": 0}, {"captions": ["animals bleat and cry out and then a woman speaks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["yZp6xizR0yU", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["animal, bleat, cry", "rustling, ducks, quack"], "captions_pred_video": ["footage of a woman feeding goats in a barn", null], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 0}, {"captions": ["an airplane flies overhead as a woman speaks", "a few ducks quack and scamper and a man speaks"], "sample_ids": ["zj2R0XoFr5k", "w2bYrCVLT60"], "start_seconds": ["50", "120"], "properties": ["airplane, fly, overhead", "ducks, speak, quack"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "of the ducks drinking from a pink pool in the grass"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "ducks are quacking and a man is speaking"], "question": "which entity is speaking", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["y8WEcpOlT3I", "wz7N8YRy74I"], "start_seconds": ["40", "30"], "properties": ["harsh, wind, blows", "rooster, crow, background, men"], "captions_pred_video": ["on how to use a sewing machine youtube", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["a large crowd cheers and applauds", "a stream of water flows as people talk and wind blows"], "sample_ids": ["rqfQRErjfk8", "xBxDz0CFVn0"], "start_seconds": ["170", "30"], "properties": ["crowd, cheers, applauds", "stream, water, flow"], "captions_pred_video": ["a man hugging another man in front of an orchestra", "footage is blurry and out of focus"], "captions_pred_audio": ["a crowd of people clapping and cheering", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a clock ticktocks and sounds an alarm then a man laughs", "someone is typing on a computer keyboard"], "sample_ids": ["xV7Mg1QucSc", "v0x1odnXtP0"], "start_seconds": ["14", "210"], "properties": ["alarm, ticktocks, laughs", "keyboard, type, computer"], "captions_pred_video": ["a cuckoo clock hanging on the wall", "how to make money on youtube in spanish"], "captions_pred_audio": ["an alarm clock ticks and a woman laughs", "a person is typing on a keyboard"], "question": "which object is used to type on a computer", "label": 1}, {"captions": ["a vehicle accelerates squealing tires", "a horn rings out as a machine runs by"], "sample_ids": ["sd7xVssqlw", "slZLHwNbbt4"], "start_seconds": ["50", "300"], "properties": ["accelerates, tires, squealing", "a, horn, run"], "captions_pred_video": [null, "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "an infant crying as a woman laughs"], "sample_ids": ["zj2R0XoFr5k", "xhmRY9yhC7c"], "start_seconds": ["50", "20"], "properties": ["airplane, boy, fly", "a, laugh, infant"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["continuous chugging with birds chirping in the background", "a door opens and closes"], "sample_ids": ["xM4joTqDVp4", "vBHyYJ8pL0"], "start_seconds": ["160", "2"], "properties": ["background, chirp, birds", "open, close, door"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", null], "captions_pred_audio": ["birds are chirping and a train is moving ", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which door is opening and closing", "label": 1}, {"captions": ["an adult man speaks over glass clinking", "a stream of water flows as people talk and wind blows"], "sample_ids": ["u6jIvCtKarQ", "xBxDz0CFVn0"], "start_seconds": ["70", "30"], "properties": ["a, man, speaks", "stream, water, flow"], "captions_pred_video": ["footage of a person using a blender on a stove top", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and dishes are being moved with background noise ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["speaking following by laughing and clapping", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["u2f5NpsoHBg", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["person, laugh, clap", "female, spraying, scream"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a woman is speaking and a baby is crying"], "question": "which entity shows a person speaking", "label": 0}, {"captions": ["a man speaks on a radio as wind blows", "a motorcycle engine works nearby"], "sample_ids": ["tDVADusiIoc", "tOSWIURC-4"], "start_seconds": ["60", "0"], "properties": ["man, radio, blows", "engine, work, nearby"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a lawn mower is running "], "question": "which entity is working", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["ukxt9I7eMMg", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["continuous, woman, speaking", "male, duck, laugh"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck quacking as others laugh?", "label": 1}, {"captions": ["plastic is tapped on while someone speaks", "wind blowing followed by a zoom"], "sample_ids": ["wvKpEYswXO0", "vr8ZXjEBhMQ"], "start_seconds": ["150", "150"], "properties": ["plastic, tap, speak", "wind, blow, zoom"], "captions_pred_video": ["of the person preparing food in the kitchen", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["a toilet flushes and water drains", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["sfAvvZwdLCY", "xKB8O8LTs6s"], "start_seconds": ["20", "70"], "properties": ["water drains, flushes, water", "music, gunfire, explosion"], "captions_pred_video": ["footage of the toilet in the bathroom", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a toilet is flushed", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is a scene of a toilet flushing and water draining?", "label": 0}, {"captions": ["people converse in the distance as a clock ticks", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vZAw4apG0Es", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["people, clock, converse", "applause, audience, yells"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["an insect buzzes around continuously", "people applaud and hoot and chat quietly"], "sample_ids": ["v25l1jef3JY", "wwyfGO2J4"], "start_seconds": ["0", "90"], "properties": ["buzzes, continuously, insect", "people, applaud, hoot"], "captions_pred_video": ["a black background with a cartoon character in the foreground", null], "captions_pred_audio": ["a fly is buzzing around a microphone ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays"], "sample_ids": ["xKB8O8LTs6s", "sU53zg9Jp7s"], "start_seconds": ["70", "380"], "properties": ["music, radio, gunshots", "a bird chirps, a door bell ringing, a woman gasps"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "a cartoon girl is standing in front of a blue couch"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "birds chirp and a doorbell rings with breathing and music in the background "], "question": "which entity is more calm", "label": 1}, {"captions": ["a dog barks and whimpers", "an infant crying frantically"], "sample_ids": ["sShpyu2l4YQ", "zwOBqeFTgiU"], "start_seconds": ["0", "30"], "properties": ["barks, whimpers, dog", "cry, infant, frantically"], "captions_pred_video": ["the puppies are playing with a toy", "of the baby crying in the car seat"], "captions_pred_audio": ["a dog is barking and growling", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a man speaks as music plays before artillery is fired", "some men converse over an engine running"], "sample_ids": ["vcmWSmvti8", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["music, man, fire", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a man speaking as music plays before artillery is fired?", "label": 0}, {"captions": ["a woman speaks over sizzling noise", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["yajyRTUQk3U", "wz7N8YRy74I"], "start_seconds": ["400", "30"], "properties": ["noise, woman, speak", "rooster, crow, background, men"], "captions_pred_video": ["- a woman cooking in the kitchen", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "plastic is tapped on while someone speaks"], "sample_ids": ["uYT5gxnyMWM", "wvKpEYswXO0"], "start_seconds": ["50", "150"], "properties": ["person, spray, yell", "plastic, tap, speak"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is a person speaking over?", "label": 0}, {"captions": ["a man speaks as music plays before artillery is fired", "a man speaks followed by another man speaking outside"], "sample_ids": ["vcmWSmvti8", "viuTg1M-dqg"], "start_seconds": ["30", "30"], "properties": ["music, man, fire", "two men, speak, follow"], "captions_pred_video": [null, "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking music is playing and an explosion is heard ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more people speaking", "label": 1}, {"captions": ["gunshots ring out, a man yells, and more shots follow", "someone is typing on a computer keyboard"], "sample_ids": ["vKrYfzleLB8", "v0x1odnXtP0"], "start_seconds": ["110", "210"], "properties": ["a, ring, gunshots", "keyboard, type, computer"], "captions_pred_video": ["stock footage of a person holding a gun in their hand", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking with background noise and a cap gun is fired ", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["winds blows roughly as a vehicle races past", "several insects fly while two men talk"], "sample_ids": ["xjvTpk2Zpr8", "s-T9OVOiMLo"], "start_seconds": ["70", "330"], "properties": ["wind, blows, vehicle", "several, fly, men"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a man is speaking while insects are buzzing in the background "], "question": "which entity is more likely to be in a garden", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "a man speaks followed by another man speaking outside"], "sample_ids": ["tDlfY3nmx1A", "viuTg1M-dqg"], "start_seconds": ["160", "30"], "properties": ["applause, laugh, man", "two men, speak, follow"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has two men speaking to each other?", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["wy1eKjR7KC0", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["people, talk, distance", "a woman, something, fried"], "captions_pred_video": ["two police officers riding motorcycles down the street", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a man is speaking and a siren is going off", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a harsh wind blows as a man speaks and another man speaks", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["y8WEcpOlT3I", "tiDFTC-5vU"], "start_seconds": ["40", "30"], "properties": ["harsh, wind, blows", "male, duck, laugh"], "captions_pred_video": ["on how to use a sewing machine youtube", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "a man speaks as a motor runs in the background"], "sample_ids": ["ukg5L09Wpvo", "xZepNM9qcRA"], "start_seconds": ["150", "30"], "properties": ["a train, a horn, a bell", "background, motor, run"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["wyllXV6PjKo", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["a kid, talk, cry", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a woman speaks and a baby cries", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a clock ticktocks in wind", "wind blows as people chatter quietly"], "sample_ids": ["yVumC9TGknc", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["ticktocks, clock, wind", "wind, chatter, people"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", "footage is blurry and out of focus"], "captions_pred_audio": ["a series of beeps and chirps", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a motorcycle engine works nearby", "water pouring and bubbling"], "sample_ids": ["tOSWIURC-4", "uyRfq-jKPpo"], "start_seconds": ["0", "50"], "properties": ["engine, work, nearby", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a lawn mower is running ", "water is running from a faucet"], "question": "which is a liquid", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["sTpirNYo8vQ", "w34HjHr6gAY"], "start_seconds": ["30", "30"], "properties": ["a, tone, fast", "beeps, hit, woman"], "captions_pred_video": ["of a man taking a selfie on a bus", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a beep sounds followed by a child speaking"], "question": "which entity has a woman talking?", "label": 1}, {"captions": ["multiple birds chirp and an animal grunts", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["tDlysoZiA1I", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["animal, grunt, multiple", "female, spraying, scream"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["young female child snoring and breathing deeply", "a person sniffs and sneezes"], "sample_ids": ["sAam2NqGhLY", "uRlbY6aoBU"], "start_seconds": ["20", "0"], "properties": ["snoring, breathing, child", "sneezes, person, sniffs"], "captions_pred_video": ["of a little girl sleeping on a couch", null], "captions_pred_audio": ["a person is snoring", "a man is sneezing "], "question": "which entity is a person", "label": 1}, {"captions": ["a person snoring", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["t8tv5YRMJUg", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["a person, snore, loud", "a, scream, girl"], "captions_pred_video": ["of a man getting his face licked by another man", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a person sniffs and breathes heavily", "a woman is speaking and a baby is crying"], "question": "which entity is louder", "label": 1}, {"captions": ["a clicking followed by some people laughing and a kid speaking", "an infant crying as a woman laughs"], "sample_ids": ["vz8868znkVQ", "xhmRY9yhC7c"], "start_seconds": ["60", "20"], "properties": ["audio, click, kid speaking", "a, laugh, infant"], "captions_pred_video": ["a video of a plane flying over a cloudy sky", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a baby is laughing and breathing with background noise ", "a baby cries and a woman speaks"], "question": "which entity is a video", "label": 1}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zgUgkpk78xU", "yajyRTUQk3U"], "start_seconds": ["70", "400"], "properties": ["clinking, humming, horn", "a woman, something, fried"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a woman is speaking while food is frying in the background"], "question": "which entity is about cooking?", "label": 1}, {"captions": ["a large crowd cheers and applauds", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["rqfQRErjfk8", "w5W5Kqtc8E"], "start_seconds": ["170", "100"], "properties": ["crowd, cheers, applauds", "wind, blow, vehicle"], "captions_pred_video": ["a man hugging another man in front of an orchestra", null], "captions_pred_audio": ["a crowd of people clapping and cheering", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is more likely to be at a sporting event", "label": 0}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "dishes cling together then a man begins to speak"], "sample_ids": ["vbZ-0lGPneg", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["a woman, a television program, a bird", "cling, speak, dishes"], "captions_pred_video": ["of a man holding a baby duck in his hands", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "mechanisms are operating and water is splashing "], "question": "which entity has a bird?", "label": 0}, {"captions": ["a church bell rings several times", "a car speeding up in the distance"], "sample_ids": ["sUVVjE3Ucp8", "u0TrcHhkPQ"], "start_seconds": ["0", "20"], "properties": ["ring, bell, several", "distance, car, speed"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", null], "captions_pred_audio": ["a church bell is ringing ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "a jet engine spools up and takes off"], "sample_ids": ["tIY7qOV3rEM", "vBslzh7saPw"], "start_seconds": ["0", "90"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "engine, spools, takes"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "a pickup truck carrying a large object down the road"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a jet engine roars and accelerates "], "question": "which entity is not a living thing", "label": 1}, {"captions": ["a telephone rings followed by a woman talking", "a man speaks as a car is passing by"], "sample_ids": ["tGcFnX0GHI", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["ring, talk, woman", "a, car, pass"], "captions_pred_video": [null, "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a dial tone sounds followed by a woman speaking", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a woman talking?", "label": 0}, {"captions": ["a vehicle engine revs as the vehicle passes", "people cheer as a vehicle engine revs"], "sample_ids": ["yDoT73BWsdA", "xjhAnI2q6hM"], "start_seconds": ["10", "6"], "properties": ["engine, revs, vehicle", "engine revs, vehicle, people"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a truck is revving its engine and a man is speaking "], "question": "which vehicle is revving its engine", "label": 1}, {"captions": ["an infant crying as a woman laughs", "dishes cling together then a man begins to speak"], "sample_ids": ["xhmRY9yhC7c", "sQGXqGcwOTc"], "start_seconds": ["20", "3"], "properties": ["a, laugh, infant", "cling, speak, dishes"], "captions_pred_video": ["of a baby crying in a baby bouncer", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a baby cries and a woman speaks", "mechanisms are operating and water is splashing "], "question": "which entity is about a woman and an infant?", "label": 0}, {"captions": ["a woman speaks as she rubs two objects together", "people speak as gunfire rings out"], "sample_ids": ["vzxHnu-SFEw", "wqTCwqVRDlk"], "start_seconds": ["80", "80"], "properties": ["two objects, woman, speak", "gunfire, ring, speak"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["an engine runs and a man speaks", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["yT5WfYMRr-U", "wqZ135Ssz0"], "start_seconds": ["30", "60"], "properties": ["engine, run, man", "two men, woman, birds"], "captions_pred_video": ["a man in a red jacket and sunglasses driving a boat", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a train engine runs and a horn blows", "a infant makes noise and is excited"], "sample_ids": ["zPX9o1uDiI", "wIJK3-5y0kA"], "start_seconds": ["40", "30"], "properties": ["engine, horn, run", "noise, excited, infant"], "captions_pred_video": [null, "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["multiple ducks quack continuously", "several insects fly while two men talk"], "sample_ids": ["wfHeoPDLMaM", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["multiple, quack, continuously", "several, fly, men"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["ducks are quacking", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a video of a single event", "label": 1}, {"captions": ["a person speaks briefly", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["zOZleIRqZm4", "wDVMhEdTiVw"], "start_seconds": ["80", "30"], "properties": ["person, talk, brief", "gun, shoot, water"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to cause injury", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "someone is typing on a computer keyboard"], "sample_ids": ["wvKpEYswXO0", "v0x1odnXtP0"], "start_seconds": ["150", "210"], "properties": ["water, tap, run", "keyboard, type, computer"], "captions_pred_video": ["of the person preparing food in the kitchen", "how to make money on youtube in spanish"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a person is typing on a keyboard"], "question": "which action is performed on a computer", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["y1saVTXsKwc", "xBxDz0CFVn0"], "start_seconds": ["80", "30"], "properties": ["a, dog, talk", "stream, water, flow"], "captions_pred_video": ["a dog playing with a pink ball", "footage is blurry and out of focus"], "captions_pred_audio": ["a dog barks and a man speaks", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["tIY7qOV3rEM", "tDVADusiIoc"], "start_seconds": ["0", "60"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "water, radio, man"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a power tool runs and touches a surface", "a man speaks as a motor runs in the background"], "sample_ids": ["zfvPRf3chY", "xZepNM9qcRA"], "start_seconds": ["290", "30"], "properties": ["power tool, run, touch", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking while a power tool is being used ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is not touching a surface", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "roadway noise occurs and a truck accelerates"], "sample_ids": ["wTjoRj1se3U", "tgbONvsP47Y"], "start_seconds": ["390", "0"], "properties": ["engine, run, people", "noise, truck, accelerate"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a jet engine is running and people are talking", "a car is driving on the road "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "paper is crumpling consistently"], "sample_ids": ["w2M4i1mklOA", "v5cSxLaHADY"], "start_seconds": ["30", "0"], "properties": ["loud, chime, bell", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of an antique clock", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "a clock ticks quietly and rhythmically"], "sample_ids": ["y8WEcpOlT3I", "u7C-AEBQM"], "start_seconds": ["40", "30"], "properties": ["wind, speak, buffeting", "ticks, rhythmic, quiet"], "captions_pred_video": ["on how to use a sewing machine youtube", null], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a ticktock of a clock"], "question": "which entity is quieter", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "music plays and a woman speaks on a radio before gunshots are fired"], "sample_ids": ["vddP56-ogds", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["liquid, laughs, man", "music, radio, gunshots"], "captions_pred_video": [null, "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vs65y4qmyBE", "vbZ-0lGPneg"], "start_seconds": ["340", "30"], "properties": ["engine, run, man", "a woman, a television program, a bird"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["wind blows strongly and a young man speaks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["vs65y4qmyBE", "xBxDz0CFVn0"], "start_seconds": ["340", "30"], "properties": ["wind, blows, strongly", "stream, water, flow"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "footage is blurry and out of focus"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["multiple motorcycles pass by as a man speaks", "leaves rustling followed by a small bell chiming as birds chirp in the background"], "sample_ids": ["zcDwZ6W7E3E", "ziUT9IFTkjg"], "start_seconds": ["180", "10"], "properties": ["man, speak, motorcycles", "background, birds, rustling"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", null], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "birds are chirping and a chime is ringing "], "question": "which entity is a video of a man speaking?", "label": 0}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "someone is typing on a computer keyboard"], "sample_ids": ["sG7TyPnFDR0", "v0x1odnXtP0"], "start_seconds": ["180", "210"], "properties": ["beeps, machine, smoke alarm", "keyboard, type, computer"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a person is typing on a keyboard"], "question": "which entity is typing on a computer keyboard?", "label": 1}, {"captions": ["a door slams shut and an object moves on a hard surface", "wind blows as people chatter quietly"], "sample_ids": ["zkKdxzNC97Y", "xBxDz0CFVn0"], "start_seconds": ["27", "30"], "properties": ["hard, surface, door", "wind, chatter, people"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "footage is blurry and out of focus"], "captions_pred_audio": ["a door is opened and closed", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a car accelerates and wind blows", "a man speaks followed by another man speaking outside"], "sample_ids": ["u0TrcHhkPQ", "viuTg1M-dqg"], "start_seconds": ["20", "30"], "properties": ["accelerates, wind, blows", "two men, speak, follow"], "captions_pred_video": [null, "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of two men speaking?", "label": 1}, {"captions": ["an audience gives applause", "paper is crumpling consistently"], "sample_ids": ["x6iCUDmRpKQ", "v5cSxLaHADY"], "start_seconds": ["38", "0"], "properties": ["applause, audience, give", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a black background with the moon and stars in the sky", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a group of people are clapping and cheering", "paper is crumpled and crinkled"], "question": "which is not a person", "label": 0}, {"captions": ["a woman speaks and then a man speaks", "a train horn blows as it passes by"], "sample_ids": ["vbpKkWvfOu4", "zVacuqSb4LI"], "start_seconds": ["560", "30"], "properties": ["a, man, speaks", "horn, blows, train"], "captions_pred_video": ["2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a woman is speaking and a man is speaking", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which is not a person speaking", "label": 1}, {"captions": ["birds chirp as a train approaches", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["xM4joTqDVp4", "yDoT73BWsdA"], "start_seconds": ["160", "10"], "properties": ["bird, chirp, train", "engine, revs, vehicle"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["birds chirp quietly and an adult man speaks", "an infant crying frantically"], "sample_ids": ["zuua6-5goWw", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["birds, chirp, quiet, man, speaks", "cry, infant, frantically"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "of the baby crying in the car seat"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a baby cries loudly"], "question": "which entity is quieter", "label": 0}, {"captions": ["a baby cries and wails as an adult female speaks", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["zliInBdC98Y", "yajyRTUQk3U"], "start_seconds": ["30", "400"], "properties": ["a, baby, cries, wails", "a woman, something, fried"], "captions_pred_video": ["of a cute little girl playing with her hair in the bathroom", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a baby cries and a woman speaks", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["an emergency siren wails as it passes", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vGj1XLJvNrw", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["wails, wails, pass", "female, spraying, scream"], "captions_pred_video": ["footage of a police car driving down a city street", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a sleeping person snores and wheezes", "a infant makes noise and is excited"], "sample_ids": ["spJCm8tD9Zo", "wIJK3-5y0kA"], "start_seconds": ["90", "30"], "properties": ["snores, wheezes, sleeps", "noise, excited, infant"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a person is snoring loudly", "a baby cries and a woman speaks"], "question": "which entity is more quiet", "label": 0}, {"captions": ["animals bleat and moo as a person speaks", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["tPJvjq9QePY", "tdWhHV3X25Q"], "start_seconds": ["40", "60"], "properties": ["animal, bleat, moo", "applause, audience, yells"], "captions_pred_video": ["a dog and a sheep in a barn", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a baby cries and a man speaks", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["someone sprays liquid onto a hard surface", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["sQwlkXjQabo", "y8WEcpOlT3I"], "start_seconds": ["10", "40"], "properties": ["liquid, surface, spray", "harsh, wind, blows"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "on how to use a sewing machine youtube"], "captions_pred_audio": ["spraying followed by silence", "a man is speaking with wind noise in the background "], "question": "which entity is not a liquid", "label": 1}, {"captions": ["children cry and people talk", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["xLwHe825Zs", "vfYTJq7nU"], "start_seconds": ["18", "130"], "properties": ["people talk, children cry, people talk", "rustling, ducks, quack"], "captions_pred_video": [null, null], "captions_pred_audio": ["a baby cries and a woman speaks", "a duck quacks and a woman speaks"], "question": "which entity is about animals?", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "a car speeding up in the distance"], "sample_ids": ["tiDFTC-5vU", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["male, duck, laugh", "distance, car, speed"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a race car accelerates and revs its engine "], "question": "which is moving faster", "label": 0}, {"captions": ["a vehicle engine revs as the vehicle passes", "paper is crumpling consistently"], "sample_ids": ["yDoT73BWsdA", "v5cSxLaHADY"], "start_seconds": ["10", "0"], "properties": ["engine, revs, vehicle", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["water quietly rushes by while birds chirp in the background", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["sYITalLZjj4", "uZesmtKZGSw"], "start_seconds": ["30", "250"], "properties": ["water, rushes, background, birds", "men, talk, cars"], "captions_pred_video": ["two ducks are swimming in the water near each other", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["wind blows and birds chirp", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["an engine idles quietly then gradually becomes louder", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vbr9mHKc8WM", "zl9Dqx-j7q4"], "start_seconds": ["40", "6"], "properties": ["noise, loudness, engine", "engine, laugh, loud"], "captions_pred_video": [null, "footage of a man driving a car in the dark"], "captions_pred_audio": ["an engine is idling", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as crickets sing", "a car accelerates and wind blows"], "sample_ids": ["ryFDPxgDOGc", "u0TrcHhkPQ"], "start_seconds": ["570", "20"], "properties": ["a, crickets, sing", "accelerates, wind, blows"], "captions_pred_video": ["a group of people dressed in camouflage and hunting gear in the dark", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a machine beeps continuously", "rustling with distant murmuring"], "sample_ids": ["y682ml90jGw", "wnNNcxAPwGQ"], "start_seconds": ["11", "0"], "properties": ["beeps, machine, continuously", "sound, distance, rustling"], "captions_pred_video": [null, "footage of a yellow truck doing a burnout on a race track"], "captions_pred_audio": ["a beeping sound is being made ", "a crowd of people are talking and laughing while a skateboard rolls by "], "question": "which entity is quieter", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["ugHJF0hfYkg", "w5W5Kqtc8E"], "start_seconds": ["10", "100"], "properties": ["engine, running, continuously", "wind, blow, vehicle"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", null], "captions_pred_audio": ["a helicopter is flying overhead ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a running engine", "label": 1}, {"captions": ["a duck quacks continuously", "a stream of water runs briefly"], "sample_ids": ["vh30P49Po6s", "x-PeY8Yb8M4"], "start_seconds": ["30", "300"], "properties": ["quacks, continuously, duck", "stream, water, run"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a duck is quacking loudly", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "a heavy rain falls endlessly"], "sample_ids": ["zofjfKhqLk8", "wP8ZKrlx3oA"], "start_seconds": ["10", "40"], "properties": ["noise, stop, motor", "heavy, rain, fall"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage of a flooded street in the middle of a desert with mountains in the background"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a heavy rain is falling on a surface"], "question": "which entity is falling", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "winds blows roughly as a vehicle races past"], "sample_ids": ["sTpirNYo8vQ", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["a, tone, fast", "wind, blows, vehicle"], "captions_pred_video": ["of a man taking a selfie on a bus", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a jet engine roars and wind blows "], "question": "which entity is not a vehicle?", "label": 0}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["uRExseg-0XI", "zl9Dqx-j7q4"], "start_seconds": ["210", "6"], "properties": ["woman, man, water", "engine, laugh, loud"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["people clap and speak in the distance", "people speak as gunfire rings out"], "sample_ids": ["wwyfGO2J4", "wqTCwqVRDlk"], "start_seconds": ["90", "80"], "properties": ["clap, distance, speak", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vddP56-ogds", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["liquid, laughs, man", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "a man speaks as a car is passing by"], "sample_ids": ["vJ7JPEFhyLA", "sK4u5T8hW78"], "start_seconds": ["16", "30"], "properties": ["three men, wind, flow", "a, car, pass"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "food is frying then a woman speaks"], "sample_ids": ["w34HjHr6gAY", "ukxt9I7eMMg"], "start_seconds": ["30", "30"], "properties": ["beeps, squawk, child speaking", "food, woman, speak"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "footage of a person preparing food on a stool in a kitchen"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a woman is speaking while food is frying in the background "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["people speak softly as food sizzles", "a woman speaks happily and an animal chirps"], "sample_ids": ["yhQ2Lg-7qDY", "uWAAAL4CIoc"], "start_seconds": ["130", "0"], "properties": ["food, sizzle, speak", "a woman, chirps, animal"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", null], "captions_pred_audio": ["a faucet is running and a man is speaking", "a woman is speaking and a dog is barking "], "question": "which entity has a more active animal", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "plastic is tapped on while someone speaks"], "sample_ids": ["vf9xf3vMsGM", "wvKpEYswXO0"], "start_seconds": ["540", "150"], "properties": ["A man speaks while turning a water faucet on.", "plastic, tap, speak"], "captions_pred_video": ["of the person washing their hands under the faucet", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["a man speaks in the background while a slow tick repeats", "birds chirp and objects are moved around"], "sample_ids": ["vZAw4apG0Es", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["background, tick, repeat", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a clock is ticking and people are talking", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "water is sprayed across a hard surface"], "sample_ids": ["uPDn2BFTHk", "sQwlkXjQabo"], "start_seconds": ["140", "10"], "properties": ["woman, laughs, speaks", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a baby laughs and a woman speaks", "spraying followed by silence"], "question": "which entity is a video of a person speaking?", "label": 0}, {"captions": ["a man speaks as a motor runs in the background", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["xZepNM9qcRA", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["background, motor, run", "gun, shoot, water"], "captions_pred_video": ["a close-up view of the motorcycle's engine and exhaust system", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man speaks while a motorcycle revs and accelerates ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "bees buzz and wind blows"], "sample_ids": ["yJ0TePmaOo", "tMJne1a4AFI"], "start_seconds": ["390", "0"], "properties": ["two hard objects, man, speak", "bees buzz, wind blows, bees"], "captions_pred_video": [null, "a swarm of bees on the ground"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "a swarm of bees buzzing around"], "question": "which entity is moving", "label": 1}, {"captions": ["food is frying while a woman speaks", "people speak in the background as a clock ticktocks"], "sample_ids": ["yhQ2Lg-7qDY", "vZAw4apG0Es"], "start_seconds": ["130", "30"], "properties": ["food, woman, speak", "background, clock, ticktocks"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "a clock made out of wood and gears with birds on top of it"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a clock is ticking and people are talking"], "question": "which entity has a clock ticktocking in the background?", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "a chime of a clock followed by various tones of ticking with come clinking"], "sample_ids": ["wz7N8YRy74I", "uqFtmnhuqA8"], "start_seconds": ["30", "30"], "properties": ["rooster, crow, background, men", "a, b, c"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "shows a clock on the wall of a room with a person standing in front of it"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "mechanisms are ticking and a hammer is striking "], "question": "which entity is a clock?", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["tIY7qOV3rEM", "xKB8O8LTs6s"], "start_seconds": ["0", "70"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "music, gunfire, explosion"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["an infant crying as a woman laughs", "a stream of water runs briefly"], "sample_ids": ["xhmRY9yhC7c", "x-PeY8Yb8M4"], "start_seconds": ["20", "300"], "properties": ["a, laugh, infant", "stream, water, run"], "captions_pred_video": ["of a baby crying in a baby bouncer", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a baby cries and a woman speaks", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["someone whistles briefly", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["uFoga8sHpiw", "wz7N8YRy74I"], "start_seconds": ["90", "30"], "properties": ["sound, duration, pitch", "rooster, crow, background, men"], "captions_pred_video": ["footage of a bird in a cage", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a person whistles a song", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a longer duration", "label": 1}, {"captions": ["propeller rearing loudly with some male and female voices interspersed in the background", "a duck quacks continuously"], "sample_ids": ["vqZuVbG6-HI", "vh30P49Po6s"], "start_seconds": ["130", "30"], "properties": ["background, male, female", "quacks, continuously, duck"], "captions_pred_video": ["footage is blurry because it's raining outside", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "a duck is quacking loudly"], "question": "which entity is a duck?", "label": 1}, {"captions": ["a person is snoring while sleeping", "plastic is tapped on while someone speaks"], "sample_ids": ["vJrjSeP17yE", "wvKpEYswXO0"], "start_seconds": ["40", "150"], "properties": ["a person is sleeping, snoring, person", "plastic, tap, speak"], "captions_pred_video": ["a black background with a small plane flying in the sky", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a person snoring loudly", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["a clock ticktocks", "a car speeding up in the distance"], "sample_ids": ["v-g-j2uTByM", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["ticktocks, clock, ticktocks", "distance, car, speed"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", null], "captions_pred_audio": ["a clock is ticking loudly", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a jet engine screams, then increases its power", "winds blows roughly as a vehicle races past"], "sample_ids": ["vBslzh7saPw", "xjvTpk2Zpr8"], "start_seconds": ["90", "70"], "properties": ["power, scream, increase", "wind, blows, vehicle"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a jet engine roars and accelerates ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["a storm rolls by as thunder and lighting strike in the distance", "a woman speaks happily and an animal chirps"], "sample_ids": ["yNtRmrn0io8", "uWAAAL4CIoc"], "start_seconds": ["210", "0"], "properties": ["storm, distance, strike", "a woman, chirps, animal"], "captions_pred_video": ["footage of a house in the middle of the night", null], "captions_pred_audio": ["rain falls and thunder roars", "a woman is speaking and a dog is barking "], "question": "which entity is more calm", "label": 1}, {"captions": ["a toilet flushes and water drains unevenly", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vhJWZheqaE", "tiDFTC-5vU"], "start_seconds": ["0", "30"], "properties": ["water drains unevenly, toilet flushes, water drains", "male, duck, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["a toilet is flushed", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking to a duck?", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "an infant crying as a woman laughs"], "sample_ids": ["zcDwZ6W7E3E", "xhmRY9yhC7c"], "start_seconds": ["180", "20"], "properties": ["a, man, speak", "a, laugh, infant"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a gun shoots, followed by water sloshing nearby", "wind blows as people chatter quietly"], "sample_ids": ["wDVMhEdTiVw", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["gun, shoot, water", "wind, chatter, people"], "captions_pred_video": ["a blurry image of trees and water in the forest", "footage is blurry and out of focus"], "captions_pred_audio": ["a gun is fired followed by splashing and a person sneezing", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "a duck quacks continuously"], "sample_ids": ["wRV8yMk886E", "vh30P49Po6s"], "start_seconds": ["0", "30"], "properties": ["liquid, spray, nozzle", "quacks, continuously, duck"], "captions_pred_video": ["two cars are parked in a parking lot at night", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["continuous sizzling with a woman speaking towards the end", "people cheer as a vehicle engine revs"], "sample_ids": ["ukxt9I7eMMg", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["continuous, woman, speaking", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a stream runs then someone speaks", "people cheer as a vehicle engine revs"], "sample_ids": ["wbHTKEJZyhc", "xjhAnI2q6hM"], "start_seconds": ["20", "6"], "properties": ["stream, run, someone", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a child and woman laughs and the woman speaks", "a child speaks in closed space"], "sample_ids": ["uPDn2BFTHk", "yW6FWLSLkx4"], "start_seconds": ["140", "40"], "properties": ["woman, laughs, speaks", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["a duck quacks continuously", "wind blowing followed by a zoom"], "sample_ids": ["vh30P49Po6s", "vr8ZXjEBhMQ"], "start_seconds": ["30", "150"], "properties": ["quacks, continuously, duck", "wind, blow, zoom"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a duck is quacking loudly", "wind blows and a chainsaw cuts through wood "], "question": "which entity is a zoom", "label": 1}, {"captions": ["birds chirp as a train approaches", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["xM4joTqDVp4", "uZesmtKZGSw"], "start_seconds": ["160", "250"], "properties": ["bird, chirp, train", "men, talk, cars"], "captions_pred_video": ["footage is of a train station with a train on the tracks and smoke coming out of the train's smokestack", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["birds are chirping and a train is moving ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a drill runs and two people laugh", "a vehicle engine accelerating then running on idle"], "sample_ids": ["tEE3MpBt1sg", "vYkA3cfXp5Q"], "start_seconds": ["50", "30"], "properties": ["two people, laugh, drill", "engine, accelerate, idle"], "captions_pred_video": ["footage is blurry due to the smoke in the air", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["people are laughing breathing and speaking with background noise ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["loud, continuous burping", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["y636gklDioE", "y8WEcpOlT3I"], "start_seconds": ["20", "40"], "properties": ["loud, continuous, burping", "harsh, wind, blows"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a person burps loudly several times", "a man is speaking with wind noise in the background "], "question": "which entity is not continuous", "label": 1}, {"captions": ["birds chirp as a bell rings", "a child speaks in closed space"], "sample_ids": ["ziUT9IFTkjg", "yW6FWLSLkx4"], "start_seconds": ["10", "40"], "properties": ["chirp, bell, ring", "child, space, speak"], "captions_pred_video": [null, "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is in a closed space", "label": 1}, {"captions": ["some people speak", "a propeller rotates loudly and intensely"], "sample_ids": ["vbZ-0lGPneg", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "loud, intense, propeller"], "captions_pred_video": ["of a man holding a baby duck in his hands", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["a clock ticktocks continuously", "water splashes and a motorboat passes as people yell"], "sample_ids": ["vlJS7LN2XyM", "w5W5Kqtc8E"], "start_seconds": ["30", "100"], "properties": ["ticktocks, clock, ticktocks continuously", "water, splashes, motorboat"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", null], "captions_pred_audio": ["a ticktock of a clock", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is more active", "label": 1}, {"captions": ["some light rustling followed by a loud burp and a girl speaking", "people cheer as a vehicle engine revs"], "sample_ids": ["vdoxuJn9lTc", "xjhAnI2q6hM"], "start_seconds": ["40", "6"], "properties": ["burp, loud, girl", "engine revs, vehicle, people"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a child speaks followed by a burp", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["animals bleat and cry out and then a woman speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["yZp6xizR0yU", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["animal, bleat, cry", "engine, laugh, loud"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "a vehicle engine runs while a siren and horn sound"], "sample_ids": ["slZLHwNbbt4", "u--KhUW8l1Y"], "start_seconds": ["300", "0"], "properties": ["a, horn, run", "engine, sound, horn"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "a firefighter spraying water from a fire hydrant at night"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a fire truck siren blares and a horn blows "], "question": "which entity has a horn that rings out as a machine runs by?", "label": 0}, {"captions": ["a baby cries and a woman speaks", "a car accelerates and wind blows"], "sample_ids": ["tMbMDvT50j8", "u0TrcHhkPQ"], "start_seconds": ["12", "20"], "properties": ["a, cry, woman", "accelerates, wind, blows"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["sU53zg9Jp7s", "zFjIWfSD-4"], "start_seconds": ["380", "410"], "properties": ["a bird chirps, a door bell ringing, a woman gasps", "People, motor, brakes"], "captions_pred_video": ["a cartoon girl is standing in front of a blue couch", null], "captions_pred_audio": ["birds chirp and a doorbell rings with breathing and music in the background ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["paper is repeatedly crumpled and crinkled", "birds chirp and objects are moved around"], "sample_ids": ["vms5XGTDVQc", "yPUYU6t3rwo"], "start_seconds": ["220", "370"], "properties": ["paper, crumpled, crinkled", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["footage of a woman opening a black bag on a table", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["paper is crumpled and crinkled", "insects buzz and a man speaks"], "question": "which entity is more likely to be a video of a person's hands?", "label": 0}, {"captions": ["children speak as a female ask them questions", "a person sniffs and sneezes"], "sample_ids": ["wEBlkGWVWwE", "uRlbY6aoBU"], "start_seconds": ["260", "0"], "properties": ["female, speak, questions", "sneezes, person, sniffs"], "captions_pred_video": ["shows a person writing on the whiteboard", null], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man is sneezing "], "question": "which entity is a person", "label": 1}, {"captions": ["a man is filing a hard object", "a telephone rings followed by a woman talking"], "sample_ids": ["vveS8HT7Uog", "tGcFnX0GHI"], "start_seconds": ["100", "0"], "properties": ["a man, hard, object", "ring, talk, woman"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", null], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a dial tone sounds followed by a woman speaking"], "question": "which entity is talking", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "paper folding and crinkling"], "sample_ids": ["vZAw4apG0Es", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["background, tick, repeat", "paper, fold, crinkle"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a clock is ticking and people are talking", "the wind blows and a mouse clicks "], "question": "which entity is a demonstration of folding and crinkling paper?", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "a toilet flushes and a female speaks"], "sample_ids": ["u--KhUW8l1Y", "yaln9y8I7ms"], "start_seconds": ["0", "230"], "properties": ["horn, siren, life", "female, flushes, toilet"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "footage is blurry and out of focus"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a toilet flushes and a man speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["a infant makes noise and is excited", "paper folding and crinkling"], "sample_ids": ["wIJK3-5y0kA", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["noise, excited, infant", "paper, fold, crinkle"], "captions_pred_video": ["of a baby playing with a cat in a dark room", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a baby cries and a woman speaks", "the wind blows and a mouse clicks "], "question": "which entity is not a person", "label": 1}, {"captions": ["children cry and people talk", "pigeons vocalize and birds chirp"], "sample_ids": ["xLwHe825Zs", "uiS58TNyUiw"], "start_seconds": ["18", "430"], "properties": ["people talk, children cry, people talk", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "a train horn blows as it passes by"], "sample_ids": ["uRExseg-0XI", "zVacuqSb4LI"], "start_seconds": ["210", "30"], "properties": ["woman, man, water", "horn, blows, train"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a train?", "label": 1}, {"captions": ["birds fly and flutter around", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["wGKgwOP3h30", "xKB8O8LTs6s"], "start_seconds": ["30", "70"], "properties": ["fly, flutter, around", "music, gunfire, explosion"], "captions_pred_video": ["of the pigeons in the coop", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["pigeons coo and flap their wings", "music plays while a woman speaks and gunshots are fired "], "question": "which entity is more active", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["uYT5gxnyMWM", "zFjIWfSD-4"], "start_seconds": ["50", "410"], "properties": ["person, spray, yell", "People, motor, brakes"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a person speaking over spraying and another person yelling?", "label": 0}, {"captions": ["a vehicle engine runs while a siren and horn sound", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["u--KhUW8l1Y", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["engine, sound, horn", "airplane, boy, fly"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["a horn honks twice and keys jingle, followed by an electronic beep", "a propeller rotates loudly and intensely"], "sample_ids": ["wSVhSdj0F0", "ugHJF0hfYkg"], "start_seconds": ["10", "10"], "properties": ["horn honks, keys jingle, electronic beep", "loud, intense, propeller"], "captions_pred_video": [null, "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["wind loudly blowing while people speak in the background followed by a horn blowing", "people speak as gunfire rings out"], "sample_ids": ["xjhAnI2q6hM", "wqTCwqVRDlk"], "start_seconds": ["6", "80"], "properties": ["wind, blow, loudly", "gunfire, ring, speak"], "captions_pred_video": ["a school bus decorated with christmas lights is floating in the water", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a truck is revving its engine and a man is speaking ", "a man is speaking and a gun is fired"], "question": "which entity is more violent", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "a toilet flushes and a female speaks"], "sample_ids": ["sOa7g-44Dag", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["background, man, spray", "female, flushes, toilet"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "the revving of an engine throttle followed by a man speaking"], "sample_ids": ["wTjoRj1se3U", "tezvROoo4bs"], "start_seconds": ["390", "40"], "properties": ["engine, run, people", "audio, throttle, speaking"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "footage of a busy city street with cars parked on both sides of the road"], "captions_pred_audio": ["a jet engine is running and people are talking", "a car accelerates and revs while a man speaks "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "white noise and snoring with some rustling in the background"], "sample_ids": ["xKB8O8LTs6s", "xzKKf9bKNUo"], "start_seconds": ["70", "10"], "properties": ["music, radio, gunshots", "background, noise, snoring"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "shows a woman laying on a bed with her eyes closed and her mouth open"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a person snoring loudly"], "question": "which entity has a woman speaking on a radio?", "label": 0}, {"captions": ["low humming with a clock ticking and birds chirping", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["yVumC9TGknc", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["humming, clock, birds", "male, duck, laugh"], "captions_pred_video": ["game title screen of the game shadow of the colossus on sony playstation 2", null], "captions_pred_audio": ["a series of beeps and chirps", "a man is speaking and ducks are quacking"], "question": "which entity has a clock ticking?", "label": 0}, {"captions": ["an adult woman speaks over chopping and silverware noises", "a child speaks in closed space"], "sample_ids": ["yYJksgsxx5U", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["audio, woman, silverware", "child, space, speak"], "captions_pred_video": ["of a woman slicing an orange on a cutting board", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a woman is speaking and dishes are clanging in the background ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a child speaking?", "label": 1}, {"captions": ["children speak as a female ask them questions", "water flows as men speak and yell"], "sample_ids": ["wEBlkGWVWwE", "vJ7JPEFhyLA"], "start_seconds": ["260", "16"], "properties": ["female, speak, questions", "water, flow, men"], "captions_pred_video": ["shows a person writing on the whiteboard", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more calm", "label": 0}, {"captions": ["two frogs croak at each other", "a man speaks as a car is passing by"], "sample_ids": ["zg0X6BnhOLQ", "sK4u5T8hW78"], "start_seconds": ["410", "30"], "properties": ["two frogs, croak, at each other", "a, car, pass"], "captions_pred_video": ["footage of lightning in the sky at night", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a frog is croaking", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["wind blows as people chatter quietly", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["xBxDz0CFVn0", "zl9Dqx-j7q4"], "start_seconds": ["30", "6"], "properties": ["wind, chatter, people", "engine, laugh, loud"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["wind blows as people chatter quietly", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["xBxDz0CFVn0", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["wind, chatter, people", "applause, audience, yells"], "captions_pred_video": ["footage is blurry and out of focus", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a horn honks and then loudly blares", "multiple people speak and children yell while water gurgles"], "sample_ids": ["wnpJndXuxLc", "vb1fPSDI4c"], "start_seconds": ["50", "30"], "properties": ["horn, honk, loud", "multiple, people, yell"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", null], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a crowd of people are talking and laughing"], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks then rubs two hard objects together", "water splashes as an animal walks through"], "sample_ids": ["yJ0TePmaOo", "w1ir-sZ3Im8"], "start_seconds": ["390", "90"], "properties": ["two hard objects, man, speak", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking with background noise and filing sounds ", "water splashes and gurgles as people speak"], "question": "which entity is more likely to cause water to splash", "label": 1}, {"captions": ["a train horn blows as it passes by", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["zVacuqSb4LI", "su6FAOcOA8c"], "start_seconds": ["30", "4"], "properties": ["horn, blows, train", "engine, idle, woman"], "captions_pred_video": ["by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a train whistle blows and a train passes by with a whistle blowing ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["rain falls on a surface as men speak and thunder roars", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["w0xsN8X18Y", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["rain, thunder, surface", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["an aircraft engine runs as wind blows heavily", "a woman and man speak while food is frying"], "sample_ids": ["xjvTpk2Zpr8", "zk-xJGQU8-4"], "start_seconds": ["70", "130"], "properties": ["engine, run, wind", "food, man, woman"], "captions_pred_video": ["footage of a dhl plane landing on the runway", "a man and a woman cooking in a wok on the stove"], "captions_pred_audio": ["a jet engine roars and wind blows ", "a woman is speaking while dishes are clanging and music is playing in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["various birds chirp and squeal, and an animal grunts", "a clock ticktocks"], "sample_ids": ["tDlysoZiA1I", "v-g-j2uTByM"], "start_seconds": ["0", "30"], "properties": ["animal, grunts, chirps", "ticktocks, clock, ticktocks"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["bees buzz and wind blows", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["tMJne1a4AFI", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["bees buzz, wind blows, bees", "female, spraying, scream"], "captions_pred_video": ["a swarm of bees on the ground", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a swarm of bees buzzing around", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "a man woman speak while crickets sing"], "sample_ids": ["s4Uz1Ffgo04", "zTLVJCo4WEE"], "start_seconds": ["100", "30"], "properties": ["roars, background, people speaking", "a, crickets, sing"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "- a boy with a rifle aiming at a target"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a woman speaks and crickets chirp"], "question": "which entity is quieter", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "continuous sizzling with a woman speaking towards the end"], "sample_ids": ["zofjfKhqLk8", "ukxt9I7eMMg"], "start_seconds": ["10", "30"], "properties": ["background, metal, clank", "continuous, woman, speaking"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "footage of a person preparing food on a stool in a kitchen"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a woman is speaking while food is frying in the background "], "question": "which entity is a video of a woman speaking?", "label": 1}, {"captions": ["a church bell rings several times", "some tunes played by whistling"], "sample_ids": ["sUVVjE3Ucp8", "u6BnG6YZqJ4"], "start_seconds": ["0", "0"], "properties": ["ring, bell, several", "tune, play, whistling"], "captions_pred_video": ["the video shows a stone wall with a clock on top of it and a bench in front of it", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a church bell is ringing ", "a person whistling a song"], "question": "which is a musical instrument", "label": 1}, {"captions": ["water flows as a woman laughs and a man speaks", "people speak as gunfire rings out"], "sample_ids": ["vddP56-ogds", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["water, flow, laugh", "gunfire, ring, speak"], "captions_pred_video": [null, "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking and a gun is fired"], "question": "which entity is more active", "label": 1}, {"captions": ["a person is burping while a girl speaks", "plastic is tapped on while someone speaks"], "sample_ids": ["vdoxuJn9lTc", "wvKpEYswXO0"], "start_seconds": ["40", "150"], "properties": ["person, burp, girl", "plastic, tap, speak"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", "of the person preparing food in the kitchen"], "captions_pred_audio": ["a child speaks followed by a burp", "a woman is speaking and tapping with background noise and water running "], "question": "which entity is tapped on while someone speaks", "label": 1}, {"captions": ["birds chirp and pigeons vocalize while walking around", "a man speaks followed by another man speaking outside"], "sample_ids": ["wIvYjuR3nrg", "viuTg1M-dqg"], "start_seconds": ["9", "30"], "properties": ["birds, pigeons, vocalize", "two men, speak, follow"], "captions_pred_video": ["footage of a pigeon sitting on a roof with trees in the background", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["birds are chirping and cooing", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a group of animals", "label": 0}, {"captions": ["a man speaks in the background while a slow tick repeats", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["vZAw4apG0Es", "tdWhHV3X25Q"], "start_seconds": ["30", "60"], "properties": ["background, tick, repeat", "applause, audience, yells"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a clock is ticking and people are talking", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["crowd applause while a guy laughs followed by another man speaking", "winds blows roughly as a vehicle races past"], "sample_ids": ["tDlfY3nmx1A", "xjvTpk2Zpr8"], "start_seconds": ["160", "70"], "properties": ["applause, laugh, man", "wind, blows, vehicle"], "captions_pred_video": ["a man in a suit and tie is talking to another man in a suit and tie", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["a crowd is clapping and laughing and a man is speaking ", "a jet engine roars and wind blows "], "question": "which entity is a vehicle racing past?", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "a stream of water flows as people talk and wind blows"], "sample_ids": ["uRExseg-0XI", "xBxDz0CFVn0"], "start_seconds": ["210", "30"], "properties": ["woman, man, water", "stream, water, flow"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "a man is speaking with wind noise in the background "], "question": "which entity has more water", "label": 1}, {"captions": ["a heavy rain falls endlessly", "an airplane engine runs"], "sample_ids": ["wP8ZKrlx3oA", "yVPZ2MNWpms"], "start_seconds": ["40", "0"], "properties": ["heavy, rain, fall", "engine, airplane, runs"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a car is driving by on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a sleeping person emits a gravely snore", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["w2JXXIAdUdg", "tdWhHV3X25Q"], "start_seconds": ["10", "60"], "properties": ["emits, sleeping, person", "applause, audience, yells"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["someone is snoring while sleeping", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["ujMt0-D-x2k", "zFjIWfSD-4"], "start_seconds": ["0", "410"], "properties": ["snore, sleep, someone", "People, motor, brakes"], "captions_pred_video": ["of the dog playing with a toy on the floor", null], "captions_pred_audio": ["a person is snoring loudly", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a person", "label": 1}, {"captions": ["a crowd yells, reacts and applauds", "a car accelerates and wind blows"], "sample_ids": ["wztCSUxOf8", "u0TrcHhkPQ"], "start_seconds": ["130", "20"], "properties": ["a crowd, yells, applauds", "accelerates, wind, blows"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["wind blowing followed by a zoom", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vr8ZXjEBhMQ", "tDVADusiIoc"], "start_seconds": ["150", "60"], "properties": ["wind, blow, zoom", "water, radio, man"], "captions_pred_video": ["is taken from a motorcycle's point of view", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about a man speaking over a radio as wind blows and water splashes?", "label": 1}, {"captions": ["several insects fly while two men talk", "a door opens and closes"], "sample_ids": ["s-T9OVOiMLo", "vBHyYJ8pL0"], "start_seconds": ["330", "2"], "properties": ["several, fly, men", "open, close, door"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", null], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "mechanisms are ticking and a sliding door is opening and closing "], "question": "which entity is more passive", "label": 1}, {"captions": ["a steam engine runs and whistles as it passes by", "people cheer as a vehicle engine revs"], "sample_ids": ["se87d6yxEOA", "xjhAnI2q6hM"], "start_seconds": ["10", "6"], "properties": ["run, whistle, pass", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a train passing by a train station with smoke billowing out of the train's smokestack", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a train is moving and blowing its whistle ", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a toilet flushes and water drains", "people cheer as a vehicle engine revs"], "sample_ids": ["sfAvvZwdLCY", "xjhAnI2q6hM"], "start_seconds": ["20", "6"], "properties": ["water drains, flushes, water", "engine revs, vehicle, people"], "captions_pred_video": ["footage of the toilet in the bathroom", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a toilet is flushed", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sQGXqGcwOTc", "su6FAOcOA8c"], "start_seconds": ["3", "4"], "properties": ["cling, speak, dishes", "engine, idle, woman"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a woman is speaking and a subway train is moving "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["an emergency siren wails as it passes", "paper is crumpling consistently"], "sample_ids": ["vGj1XLJvNrw", "v5cSxLaHADY"], "start_seconds": ["0", "0"], "properties": ["wails, wails, pass", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of a police car driving down a city street", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["an emergency vehicle with a siren is passing by", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a person snoring several times", "a horn rings out as a machine runs by"], "sample_ids": ["spJCm8tD9Zo", "slZLHwNbbt4"], "start_seconds": ["90", "300"], "properties": ["snore, person, several", "a, horn, run"], "captions_pred_video": ["of a man laying on the ground with his mouth open", "footage of a train coming down the tracks on a sunny day"], "captions_pred_audio": ["a person is snoring loudly", "a train is moving and blowing its horn with a clickety-clack sound "], "question": "which entity is not a person?", "label": 1}, {"captions": ["an engine runs and wind blows", "an aircraft engine runs"], "sample_ids": ["vs65y4qmyBE", "yLCORCnd35Q"], "start_seconds": ["340", "0"], "properties": ["engine, run, wind", "engine, aircraft, runs"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "a lufthansa airbus a380 landing at london's heathrow airport"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a train is moving and its wheels are squealing "], "question": "which entity is running", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "a child speaks in closed space"], "sample_ids": ["w34HjHr6gAY", "yW6FWLSLkx4"], "start_seconds": ["30", "40"], "properties": ["beeps, hit, woman", "child, space, speak"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is a child speaking in a closed space?", "label": 1}, {"captions": ["a man speaks as a machine runs", "a man talks as several small engines run"], "sample_ids": ["vD6lYD1l0BY", "u9A6VZQCZpU"], "start_seconds": ["330", "30"], "properties": ["a, machine, run", "a, man, talk"], "captions_pred_video": ["game controller being held in the hands of the person", null], "captions_pred_audio": ["a man is speaking and dishes are being washed ", "a man is speaking while a race car is revving and accelerating "], "question": "which entity has a man speaking as a machine runs?", "label": 0}, {"captions": ["a woman speaks and food sizzles while frying", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["wTideSjRFS0", "vfYTJq7nU"], "start_seconds": ["30", "130"], "properties": ["food, sizzle, woman", "rustling, ducks, quack"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", null], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a duck quacks and a woman speaks"], "question": "which entity is about a woman speaking and food sizzling while frying?", "label": 0}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "birds chirp and objects are moved around"], "sample_ids": ["wy1eKjR7KC0", "yPUYU6t3rwo"], "start_seconds": ["30", "370"], "properties": ["people, talk, distance", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["two police officers riding motorcycles down the street", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a man is speaking and a siren is going off", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["soft movement is accompanied by clocks ticking in the background", "a man speaks over intermittent keyboard taps"], "sample_ids": ["vlJS7LN2XyM", "tw76HGONaKg"], "start_seconds": ["30", "570"], "properties": ["background, clocks, ticking", "audio, man, keyboard"], "captions_pred_video": ["of the clock in the video is blurry and hard to make out the numbers on the face of the clock are hard to see due to the blurriness of the video", "game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda"], "captions_pred_audio": ["a ticktock of a clock", "a man speaks and types on a computer keyboard "], "question": "which entity has a man speaking over intermittent keyboard taps?", "label": 1}, {"captions": ["a man speaks then multiple motorcycles pass by", "vehicles pass by on a roadway"], "sample_ids": ["zcDwZ6W7E3E", "tgbONvsP47Y"], "start_seconds": ["180", "0"], "properties": ["a, man, speak", "pass, vehicle, roadway"], "captions_pred_video": ["2 people riding motorcycles down a mountain road with trees lining the sides of the road", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking while a car accelerates and revs its engine ", "a car is driving on the road "], "question": "which entity has more vehicles", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "women speak and laugh as wind blows"], "sample_ids": ["y1saVTXsKwc", "un9VQlzgZM"], "start_seconds": ["80", "5"], "properties": ["a, dog, talk", "wind, speak, laugh"], "captions_pred_video": ["a dog playing with a pink ball", null], "captions_pred_audio": ["a dog barks and a man speaks", "a woman is speaking and laughing with wind noise and breathing in the background "], "question": "which entity is about a woman talking to a dog?", "label": 0}, {"captions": ["continuous sizzling with a woman speaking towards the end", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["ukxt9I7eMMg", "sLUnaPT5gM8"], "start_seconds": ["30", "0"], "properties": ["continuous, woman, speaking", "loud, laughter, intermittent"], "captions_pred_video": ["footage of a person preparing food on a stool in a kitchen", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a woman is speaking while food is frying in the background ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more like a scream", "label": 1}, {"captions": ["wind blows as people chatter quietly", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["xBxDz0CFVn0", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["wind, chatter, people", "motor noise, horn, siren"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is louder", "label": 1}, {"captions": ["a person is snoring while sleeping", "some tunes played by whistling"], "sample_ids": ["vJrjSeP17yE", "u6BnG6YZqJ4"], "start_seconds": ["40", "0"], "properties": ["a person is sleeping, snoring, person", "tune, play, whistling"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a person snoring loudly", "a person whistling a song"], "question": "which entity is playing tunes", "label": 1}, {"captions": ["a man is filing a hard object", "people cheer as a vehicle engine revs"], "sample_ids": ["vveS8HT7Uog", "xjhAnI2q6hM"], "start_seconds": ["100", "6"], "properties": ["a man, hard, object", "engine revs, vehicle, people"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a truck is revving its engine and a man is speaking "], "question": "which object is harder to file", "label": 0}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["zofjfKhqLk8", "vfYTJq7nU"], "start_seconds": ["10", "130"], "properties": ["background, metal, clank", "rustling, ducks, quack"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", null], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a duck quacks and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["dog barking and vehicle engine idling followed shortly by vehicle engine revving", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["zY3icUyMdh8", "su6FAOcOA8c"], "start_seconds": ["20", "4"], "properties": ["dog, bark, engine", "engine, idle, woman"], "captions_pred_video": ["footage of a bus driving through a residential street at night", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a car is driving and dogs are barking and squealing ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "some tunes played by whistling"], "sample_ids": ["sEprKHm8Sj8", "u6BnG6YZqJ4"], "start_seconds": ["90", "0"], "properties": ["car, tires, slows", "tune, play, whistling"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a person whistling a song"], "question": "which entity is not a car?", "label": 1}, {"captions": ["water is sprayed across a hard surface", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sQwlkXjQabo", "vJ7JPEFhyLA"], "start_seconds": ["10", "16"], "properties": ["water, spray, surface", "three men, wind, flow"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["spraying followed by silence", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a video of a liquid flowing?", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "water flows and trickles"], "sample_ids": ["zl9Dqx-j7q4", "tB7hWb9gTuQ"], "start_seconds": ["6", "30"], "properties": ["motors rev, laugh, loudly", "water, flow, trickle"], "captions_pred_video": ["footage of a man driving a car in the dark", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a jet engine roars ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["two men and a woman talk while wind blows and birds tweet", "food is frying then a woman speaks"], "sample_ids": ["wqZ135Ssz0", "ukxt9I7eMMg"], "start_seconds": ["60", "30"], "properties": ["two men, woman, birds", "food, woman, speak"], "captions_pred_video": [null, "footage of a person preparing food on a stool in a kitchen"], "captions_pred_audio": ["a man is speaking and ducks are quacking with wind noise in the background ", "a woman is speaking while food is frying in the background "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["wRV8yMk886E", "tdWhHV3X25Q"], "start_seconds": ["0", "60"], "properties": ["liquid, spray, nozzle", "applause, audience, yells"], "captions_pred_video": ["two cars are parked in a parking lot at night", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["an infant crying frantically", "water flows and trickles"], "sample_ids": ["zwOBqeFTgiU", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["cry, infant, frantically", "water, flow, trickle"], "captions_pred_video": ["of the baby crying in the car seat", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a baby cries loudly", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an engine works in idle nearby followed by a man talking", "vehicles pass by on a roadway"], "sample_ids": ["wqADXCzngMw", "tgbONvsP47Y"], "start_seconds": ["340", "0"], "properties": ["engine, idle, man", "pass, vehicle, roadway"], "captions_pred_video": ["of a man working on a vintage volkswagen beetle", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a lawn mower is running and a man is speaking ", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["the rumbling of a bus followed by a soft male voice", "people applaud and hoot and chat quietly"], "sample_ids": ["vK93VuO0yNc", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["male voice, bus, rumble", "people, applaud, hoot"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", null], "captions_pred_audio": ["a car drives by with wind noise in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["an adult speaks and is typing on a computer keyboard", "a clock ticktocks"], "sample_ids": ["x9JovgqUcs", "v-g-j2uTByM"], "start_seconds": ["500", "30"], "properties": ["An adult is speaking, typing, and using a computer keyboard", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "dishes cling together then a man begins to speak"], "sample_ids": ["sG7TyPnFDR0", "sQGXqGcwOTc"], "start_seconds": ["180", "3"], "properties": ["beeps, machine, smoke alarm", "cling, speak, dishes"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "mechanisms are operating and water is splashing "], "question": "which entity has a man speaking while a machine runs?", "label": 0}, {"captions": ["water drips and bubbles as a man speaks", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["vSeGhaZt-aI", "zl9Dqx-j7q4"], "start_seconds": ["50", "6"], "properties": ["water, bubbles, speak", "engine, laugh, loud"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a horn blows as a train chugs along and warning bells ring", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["ukg5L09Wpvo", "tiDFTC-5vU"], "start_seconds": ["150", "30"], "properties": ["a train, a horn, a bell", "male, duck, laugh"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", null], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "a man is speaking and ducks are quacking"], "question": "which entity has a duck?", "label": 1}, {"captions": ["a motor runs and stops, and animals squawk and croak", "someone is typing on a computer keyboard"], "sample_ids": ["s4tUs779vBA", "v0x1odnXtP0"], "start_seconds": ["160", "210"], "properties": ["a, sound, stop", "keyboard, type, computer"], "captions_pred_video": ["game in 10 words or less - screenshot 1", "how to make money on youtube in spanish"], "captions_pred_audio": ["a car is revving and a man is speaking ", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["an aircraft engine runs", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["yLCORCnd35Q", "uYT5gxnyMWM"], "start_seconds": ["0", "50"], "properties": ["engine, aircraft, runs", "female, spraying, scream"], "captions_pred_video": ["a lufthansa airbus a380 landing at london's heathrow airport", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a train is moving and its wheels are squealing ", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "wind blows as people chatter quietly"], "sample_ids": ["w0xsN8X18Y", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["music, surface, rain", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "male speech with light ticking"], "sample_ids": ["s3cTDAj31g", "xO-Q2BlIIPU"], "start_seconds": ["80", "30"], "properties": ["man, talk, woman", "male, speech, ticking"], "captions_pred_video": [null, "a clock with a green glowing display showing the time 09 07 2016 12 31 2016"], "captions_pred_audio": ["a man is speaking and a baby is crying", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a speech?", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "a sheep baa followed by birds chirping and then more sheep baaing"], "sample_ids": ["x5cuQjOdM3E", "vlS6YMeWAPo"], "start_seconds": ["30", "40"], "properties": ["cat, talk, meow", "sheep, baa, birds"], "captions_pred_video": ["a black background with an airplane flying in the sky", "footage of a goat in a pen behind a wooden fence"], "captions_pred_audio": ["a cat meows and a woman speaks", "a goat bleats and birds chirp"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a vehicle engine accelerates and wind blows", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["wudZTNBtVqc", "vfYTJq7nU"], "start_seconds": ["60", "130"], "properties": ["accelerates, engine, wind", "rustling, ducks, quack"], "captions_pred_video": ["footage is of a parking lot with cars parked in it", null], "captions_pred_audio": ["a car accelerates and revs its engine ", "a duck quacks and a woman speaks"], "question": "which entity is a video of a vehicle?", "label": 0}, {"captions": ["a small voice speaks, music plays followed by a double whoosh, and then a bell dings", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["tQWGZLItBXk", "wDVMhEdTiVw"], "start_seconds": ["170", "30"], "properties": ["voice, music, whoosh", "gun, shoot, water"], "captions_pred_video": ["worms revolution screenshots", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a movie", "label": 1}, {"captions": ["a beep repeats multiple times", "someone is typing on a computer keyboard"], "sample_ids": ["y682ml90jGw", "v0x1odnXtP0"], "start_seconds": ["11", "210"], "properties": ["beep, repeat, multiple", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a beeping sound is being made ", "a person is typing on a keyboard"], "question": "which is not a type of computer", "label": 0}, {"captions": ["a low rumbling in the distance followed by a motorcycle engine revving up", "a propeller rotates loudly and intensely"], "sample_ids": ["vr8ZXjEBhMQ", "ugHJF0hfYkg"], "start_seconds": ["150", "10"], "properties": ["sound, distance, engine", "loud, intense, propeller"], "captions_pred_video": ["is taken from a motorcycle's point of view", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "a clock ticktocks"], "sample_ids": ["sZPuqDgX2V0", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["commentator, race, track", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a dog barks and whimpers", "birds chirp and objects are moved around"], "sample_ids": ["sShpyu2l4YQ", "yPUYU6t3rwo"], "start_seconds": ["0", "370"], "properties": ["barks, whimpers, dog", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["the puppies are playing with a toy", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a dog is barking and growling", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a baby cries and a woman moans", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["smDKStoHBJo", "wqZ135Ssz0"], "start_seconds": ["0", "60"], "properties": ["a, cry, woman", "two men, woman, birds"], "captions_pred_video": ["a man holding a crying baby in his arms", null], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more people", "label": 1}, {"captions": ["a airplane flies overhead as a woman speaks", "people cheer as a vehicle engine revs"], "sample_ids": ["zj2R0XoFr5k", "xjhAnI2q6hM"], "start_seconds": ["50", "6"], "properties": ["airplane, fly, woman", "engine revs, vehicle, people"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a truck is revving its engine and a man is speaking "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a young woman speaks and laughs and an animal snorts"], "sample_ids": ["weDbePuc-Xc", "uEU-Hg5MTN8"], "start_seconds": ["40", "27"], "properties": ["music, slaps, human", "a woman, laughs, animal"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has a woman speaking and laughing and an animal snorts?", "label": 1}, {"captions": ["a man makes an exclamation, then another man speaks", "a bird chirps followed by a door bell ringing that causes a woman to gasp and the music plays"], "sample_ids": ["xO-Q2BlIIPU", "sU53zg9Jp7s"], "start_seconds": ["30", "380"], "properties": ["two men, exclamation, speak", "a bird chirps, a door bell ringing, a woman gasps"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", "a cartoon girl is standing in front of a blue couch"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "birds chirp and a doorbell rings with breathing and music in the background "], "question": "which entity has a doorbell ringing?", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "a machine beeps continuously"], "sample_ids": ["uJV8NDaHqqk", "y682ml90jGw"], "start_seconds": ["100", "11"], "properties": ["loud, fly, chirp", "beeps, machine, continuously"], "captions_pred_video": ["a bee hive in a wooden box", null], "captions_pred_audio": ["a swarm of bees buzzing around", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks as bees buzz and birds chirp", "wind blowing followed by a zoom"], "sample_ids": ["t25U-v4k4ts", "vr8ZXjEBhMQ"], "start_seconds": ["40", "150"], "properties": ["bees buzz, birds chirp, man speaks", "wind, blow, zoom"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "is taken from a motorcycle's point of view"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "wind blows and a chainsaw cuts through wood "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks and food sizzles while frying", "small dogs yip and bark sharply"], "sample_ids": ["wTideSjRFS0", "v-wcQf4BDY0"], "start_seconds": ["30", "120"], "properties": ["food, sizzle, woman", "bark, yip, sharply"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a dog barks and growls"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks in the background while a slow tick repeats", "some tunes played by whistling"], "sample_ids": ["vZAw4apG0Es", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["background, tick, repeat", "tune, play, whistling"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a clock is ticking and people are talking", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a man speaks as insects buzz and a bird chirps", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["t25U-v4k4ts", "y8WEcpOlT3I"], "start_seconds": ["40", "40"], "properties": ["a, chirps, bird", "harsh, wind, blows"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a man is speaking with wind noise in the background "], "question": "which entity is more active", "label": 1}, {"captions": ["music plays and someone speaks before gunfire and an explosion occurs", "a horn blasts as warning bells ring"], "sample_ids": ["xKB8O8LTs6s", "zgUgkpk78xU"], "start_seconds": ["70", "70"], "properties": ["music, gunfire, explosion", "horn, bells, ring"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "a train blows its horn as it speeds down the tracks "], "question": "which entity is a warning", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["yYEVLuqEytU", "xfaoyyzw2WU"], "start_seconds": ["40", "180"], "properties": ["grunt, slurp, background", "loud, jet engine, roar"], "captions_pred_video": ["a baby goat is being petted by a person's hand", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["several sheep bleat and a man speaks", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "someone is typing on a computer keyboard"], "sample_ids": ["w0xsN8X18Y", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["music, surface, rain", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a man sprays as a scraping occurs in the background", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["sOa7g-44Dag", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["background, man, spray", "People, motor, brakes"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", null], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has a motor running?", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "a man speaks as a car is passing by"], "sample_ids": ["zgUgkpk78xU", "sK4u5T8hW78"], "start_seconds": ["70", "30"], "properties": ["horn, bells, ring", "a, car, pass"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a warning", "label": 0}, {"captions": ["a door slams shut roughly", "dishes cling together then a man begins to speak"], "sample_ids": ["zkKdxzNC97Y", "sQGXqGcwOTc"], "start_seconds": ["27", "3"], "properties": ["a door, slams, shut", "cling, speak, dishes"], "captions_pred_video": ["footage of the door opening and closing in slow motion", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a door is opened and closed", "mechanisms are operating and water is splashing "], "question": "which entity is more likely to be a door", "label": 0}, {"captions": ["water running down a sink while a man is talking", "a woman speaks and other women and a man talk with her"], "sample_ids": ["vSeGhaZt-aI", "vbpKkWvfOu4"], "start_seconds": ["50", "560"], "properties": ["water, sink, talk", "a, woman, man"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a woman is speaking and a man is speaking"], "question": "which entity has a man talking to a sink?", "label": 0}, {"captions": ["a woman speaks and food sizzles while frying", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["wTideSjRFS0", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["food, sizzle, woman", "animal, grunts, snorts"], "captions_pred_video": ["footage of a woman cooking in a kitchen with a microwave oven", "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a woman is speaking while water is running in the background", "a woman is speaking and a baby is crying"], "question": "which entity is about a woman speaking and food sizzling while frying?", "label": 0}, {"captions": ["a woman speaks as she rubs two objects together", "someone whistles a tune"], "sample_ids": ["vzxHnu-SFEw", "sIXTftIuUgw"], "start_seconds": ["80", "90"], "properties": ["two objects, woman, speak", "someone, tune, whistle"], "captions_pred_video": ["how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to", null], "captions_pred_audio": ["a woman is speaking and breathing with mechanisms in the background ", "a person whistling a song"], "question": "which is not a musical instrument", "label": 0}, {"captions": ["plastic is tapped on while someone speaks", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["wvKpEYswXO0", "tDVADusiIoc"], "start_seconds": ["150", "60"], "properties": ["plastic, tap, speak", "water, radio, man"], "captions_pred_video": ["of the person preparing food in the kitchen", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "a toilet flushes and water drains"], "sample_ids": ["sG7TyPnFDR0", "sfAvvZwdLCY"], "start_seconds": ["180", "20"], "properties": ["beeps, machine, smoke alarm", "water drains, flushes, water"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", "footage of the toilet in the bathroom"], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a toilet is flushed"], "question": "which entity is a source of water", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "an airplane engine runs"], "sample_ids": ["zj2R0XoFr5k", "yVPZ2MNWpms"], "start_seconds": ["50", "0"], "properties": ["airplane, boy, fly", "engine, airplane, runs"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a car is driving by on the road "], "question": "which airplane is flying", "label": 0}, {"captions": ["here comes the train and it starts to blow the horn and get close", "a dog barks and whimpers"], "sample_ids": ["s7knHCFW82w", "sShpyu2l4YQ"], "start_seconds": ["30", "0"], "properties": ["blow horn, get close, train", "barks, whimpers, dog"], "captions_pred_video": ["footage of the train on the tracks near a building and a car parked on the side of the road", "the puppies are playing with a toy"], "captions_pred_audio": ["a train is blowing its horn and its wheels are squealing ", "a dog is barking and growling"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a person snores hilariously while someone laughs", "water splashes as an animal walks through"], "sample_ids": ["sSMl2vc3ek", "w1ir-sZ3Im8"], "start_seconds": ["20", "90"], "properties": ["a person, laughs, snores", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a person snoring loudly", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "water flows as men speak and yell"], "sample_ids": ["w0xsN8X18Y", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["music, surface, rain", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["speaking following by laughing and clapping", "after a few seconds of silence, a loud bang occurs followed by a softer banging noise"], "sample_ids": ["u2f5NpsoHBg", "zkKdxzNC97Y"], "start_seconds": ["30", "27"], "properties": ["person, laugh, clap", "loud, bang, noise"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "footage of the door opening and closing in slow motion"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a door is opened and closed"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a weapon fires multiple times", "pigeons vocalize and birds chirp"], "sample_ids": ["sMC07Ucy7kg", "uiS58TNyUiw"], "start_seconds": ["10", "430"], "properties": ["weapon, fire, multiple", "vocalize, bird, chirp"], "captions_pred_video": ["footage is from a car's point of view", "of the pigeon in the cage"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a man is speaking and a bee is buzzing"], "question": "which entity is not a weapon", "label": 1}, {"captions": ["a motorcycle engine revs then accelerates before hitting a bump", "paper is crumpling consistently"], "sample_ids": ["w-4gHptFNuU", "v5cSxLaHADY"], "start_seconds": ["21", "0"], "properties": ["engine revs, accelerates, bump", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["is taken from a motorcycle rider's point of view", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a car accelerates and revs its engine ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "a woman speaks happily and an animal chirps"], "sample_ids": ["uYT5gxnyMWM", "uWAAAL4CIoc"], "start_seconds": ["50", "0"], "properties": ["a, scream, girl", "a woman, chirps, animal"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a woman is speaking and a dog is barking "], "question": "which entity is more calm", "label": 1}, {"captions": ["a woman talking as an infant is crying", "a car accelerates and wind blows"], "sample_ids": ["tMbMDvT50j8", "u0TrcHhkPQ"], "start_seconds": ["12", "20"], "properties": ["a, talk, infant", "accelerates, wind, blows"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a race car accelerates and revs its engine "], "question": "which is not a person", "label": 1}, {"captions": ["some people speak", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["vbZ-0lGPneg", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["some people speak English, some people speak Spanish, some people speak French", "female, spraying, scream"], "captions_pred_video": ["of a man holding a baby duck in his hands", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a woman is speaking and a baby is crying"], "question": "which entity is more violent", "label": 1}, {"captions": ["a weapon fires multiple times", "an insect buzzes around continuously"], "sample_ids": ["sMC07Ucy7kg", "v25l1jef3JY"], "start_seconds": ["10", "0"], "properties": ["weapon, fire, multiple", "buzzes, continuously, insect"], "captions_pred_video": ["footage is from a car's point of view", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a fly is buzzing around a microphone "], "question": "which entity is not a weapon", "label": 1}, {"captions": ["white noise and snoring with some rustling in the background", "children speak and play together"], "sample_ids": ["xzKKf9bKNUo", "yVVP8XvWJTo"], "start_seconds": ["10", "260"], "properties": ["background, noise, snoring", "children, speak, play"], "captions_pred_video": ["shows a woman laying on a bed with her eyes closed and her mouth open", "footage of a playground at a school or daycare center"], "captions_pred_audio": ["a person snoring loudly", "children are speaking and breathing with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "a person uses a saw to cut some wood"], "sample_ids": ["soTOh3zYJfY", "sHbXC6na9hg"], "start_seconds": ["40", "0"], "properties": ["vehicle, skid, tires", "a person, saw, wood"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", "a man using a tractor to cut a log into firewood youtube"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "an engine is idling and vibrating"], "question": "which entity is stationary", "label": 1}, {"captions": ["a cat meows and children speak", "birds chirp and wind blows"], "sample_ids": ["x5cuQjOdM3E", "sxIvBMSavMQ"], "start_seconds": ["30", "210"], "properties": ["cat, speak, children", "birds, chirp, wind"], "captions_pred_video": ["a black background with an airplane flying in the sky", "beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a"], "captions_pred_audio": ["a cat meows and a woman speaks", "birds are chirping and insects are buzzing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["an engine runs loudly", "paper is crumpling consistently"], "sample_ids": ["vqZuVbG6-HI", "v5cSxLaHADY"], "start_seconds": ["130", "0"], "properties": ["loud, engine, run", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage is blurry because it's raining outside", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a motor idles, accelerates, then slows down.", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["vYkA3cfXp5Q", "y8WEcpOlT3I"], "start_seconds": ["30", "40"], "properties": ["speed, idle, accelerate", "harsh, wind, blows"], "captions_pred_video": ["footage of a car driving down the street on a sunny day", "on how to use a sewing machine youtube"], "captions_pred_audio": ["an engine is idling", "a man is speaking with wind noise in the background "], "question": "which entity is a natural phenomenon", "label": 1}, {"captions": ["water splashes and a motorboat passes as people yell", "birds vocalize and chirp continuously"], "sample_ids": ["w5W5Kqtc8E", "w1mlz3Pe4fU"], "start_seconds": ["100", "300"], "properties": ["water, splashes, motorboat", "vocalize, chirp, continuously"], "captions_pred_video": [null, "of a bird in a cage"], "captions_pred_audio": ["a motorboat is moving and people are shouting and cheering ", "birds are chirping and singing"], "question": "which entity is quieter", "label": 1}, {"captions": ["three men talk while wind blows and some liquid flows", "dishes cling together then a man begins to speak"], "sample_ids": ["vJ7JPEFhyLA", "sQGXqGcwOTc"], "start_seconds": ["16", "3"], "properties": ["three men, wind, flow", "cling, speak, dishes"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 1}, {"captions": ["an insect buzzes around continuously", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["v25l1jef3JY", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["buzzes, continuously, insect", "engine, idle, woman"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a woman is speaking and a subway train is moving "], "question": "which entity is not a person?", "label": 0}, {"captions": ["birds chirp as a man speaks and a younger person speaks", "some men converse over an engine running"], "sample_ids": ["xl2PIWyXaM", "sCiy7QS1U"], "start_seconds": ["160", "300"], "properties": ["chirp, man, younger person", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and people are talking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a man speaking to a younger person?", "label": 0}, {"captions": ["ticking continues without interruption", "a duck quacks continuously"], "sample_ids": ["v-g-j2uTByM", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["ticking, continuous, clock", "quacks, continuously, duck"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a clock is ticking loudly", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["birds chirp and objects are moved around", "a car accelerates and wind blows"], "sample_ids": ["yPUYU6t3rwo", "u0TrcHhkPQ"], "start_seconds": ["370", "20"], "properties": ["birds chirp, objects are moved around, birds", "accelerates, wind, blows"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", null], "captions_pred_audio": ["insects buzz and a man speaks", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["a man speaks with another voice speaking in the background", "water splashes as an animal walks through"], "sample_ids": ["u21-Z5gJCB8", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["background, voice, man", "animal, water, splashes"], "captions_pred_video": ["- a person cooking eggs in a pan on the stove", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["popping and crackling repeats as men yell and laugh", "water flows as men speak and yell"], "sample_ids": ["rqu8iB22IY", "vJ7JPEFhyLA"], "start_seconds": ["5", "16"], "properties": ["sound, repeats, laugh", "water, flow, men"], "captions_pred_video": [null, "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a dog barks and a man speaks while music plays ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about water flowing as men speak and yell?", "label": 1}, {"captions": ["a male is speaking and a duck quacks as others laugh", "an infant crying frantically"], "sample_ids": ["tiDFTC-5vU", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["male, duck, laugh", "cry, infant, frantically"], "captions_pred_video": [null, "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking and ducks are quacking", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "a clock ticktocks"], "sample_ids": ["w34HjHr6gAY", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["beeps, hit, woman", "ticktocks, clock, ticktocks"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a young woman speaks over spraying and another person yells", "someone whistles a tune"], "sample_ids": ["uYT5gxnyMWM", "sIXTftIuUgw"], "start_seconds": ["50", "90"], "properties": ["person, spray, yell", "someone, tune, whistle"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", null], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["several ducks are quacking and squawking", "wind blows as people chatter quietly"], "sample_ids": ["wfHeoPDLMaM", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["quacking, squawking, ducks", "wind, chatter, people"], "captions_pred_video": ["ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire in the middle of the barn ducks in a barn with a fire", "footage is blurry and out of focus"], "captions_pred_audio": ["ducks are quacking", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "a crowd yells, reacts and applauds"], "sample_ids": ["shmR4OZtzqA", "wztCSUxOf8"], "start_seconds": ["30", "130"], "properties": ["man, engine, idle", "a crowd, yells, applauds"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", null], "captions_pred_audio": ["a man speaks while a motor runs", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a baby cries and a woman moans", "a person screams glaringly"], "sample_ids": ["smDKStoHBJo", "xC8kbrKJmco"], "start_seconds": ["0", "0"], "properties": ["a, cry, woman", "glaringly, screams, person"], "captions_pred_video": ["a man holding a crying baby in his arms", null], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a goat is bleating "], "question": "which entity is a person?", "label": 1}, {"captions": ["a man speaks over a running engine and blowing wind", "a machine beeps continuously"], "sample_ids": ["ylpYOorfH4o", "y682ml90jGw"], "start_seconds": ["410", "11"], "properties": ["engine, running, wind", "beeps, machine, continuously"], "captions_pred_video": ["for youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 1500 youtube how to replace the fuel pump on a 1999 dodge ram 15", null], "captions_pred_audio": ["a man is speaking and an engine is revving", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks as a vehicle engine idles", "a clock ticktocks"], "sample_ids": ["shmR4OZtzqA", "v-g-j2uTByM"], "start_seconds": ["30", "30"], "properties": ["man, engine, idle", "ticktocks, clock, ticktocks"], "captions_pred_video": ["shows how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche youtube how to replace a fog light bulb on a 2005 chevrolet avalanche you", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a man speaks while a motor runs", "a clock is ticking loudly"], "question": "which entity is ticktocks", "label": 1}, {"captions": ["a man talks while vehicles pass by", "an airplane engine spools and people speak"], "sample_ids": ["sK4u5T8hW78", "wTjoRj1se3U"], "start_seconds": ["30", "390"], "properties": ["a, man, talk", "airplane, engine, spool"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a jet engine is running and people are talking"], "question": "which entity is a video of a man talking?", "label": 0}, {"captions": ["a man speaks while a machine runs before a smoke alarm beeps", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["sG7TyPnFDR0", "w5W5Kqtc8E"], "start_seconds": ["180", "100"], "properties": ["beeps, machine, smoke alarm", "wind, blow, vehicle"], "captions_pred_video": ["a person is using an espresso machine in a restaurant", null], "captions_pred_audio": ["a man is speaking and a microwave oven is beeping ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a vehicle engine running?", "label": 1}, {"captions": ["a power tool runs and touches a surface", "male speech with light ticking"], "sample_ids": ["zfvPRf3chY", "xO-Q2BlIIPU"], "start_seconds": ["290", "30"], "properties": ["power tool, run, touch", "male, speech, ticking"], "captions_pred_video": [null, "a clock with a green glowing display showing the time 09 07 2016 12 31 2016"], "captions_pred_audio": ["a man is speaking while a power tool is being used ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is not a power tool", "label": 1}, {"captions": ["a toilet flushes and water drains", "an engine works in idle nearby followed by a man talking"], "sample_ids": ["sfAvvZwdLCY", "wqADXCzngMw"], "start_seconds": ["20", "340"], "properties": ["water drains, flushes, water", "engine, idle, man"], "captions_pred_video": ["footage of the toilet in the bathroom", "of a man working on a vintage volkswagen beetle"], "captions_pred_audio": ["a toilet is flushed", "a lawn mower is running and a man is speaking "], "question": "which entity is a machine?", "label": 0}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "someone whistles a tune"], "sample_ids": ["w2JXXIAdUdg", "sIXTftIuUgw"], "start_seconds": ["10", "90"], "properties": ["snoring, distance, person", "someone, tune, whistle"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", null], "captions_pred_audio": ["a person snoring and a dog whimpering", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["some liquid flows while a woman laughs and man talks", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["vddP56-ogds", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["liquid, laughs, man", "male, duck, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a man is speaking and ducks are quacking"], "question": "which entity has a duck?", "label": 1}, {"captions": ["a man speaks as a scratching occurs", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["sOa7g-44Dag", "wDVMhEdTiVw"], "start_seconds": ["30", "30"], "properties": ["audio, scratching, man", "gun, shoot, water"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["long loud burping by a man", "wind blows strongly"], "sample_ids": ["xmiUIOhtZyQ", "w8uLijTqtlU"], "start_seconds": ["60", "70"], "properties": ["loud, burp, man", "wind, blows, strongly"], "captions_pred_video": ["homer simpson drinking a beer", "footage is blurry and shaky"], "captions_pred_audio": ["a person burps and music plays in the background ", "the wind is blowing strongly"], "question": "which is louder", "label": 1}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "a man speaks as a car is passing by"], "sample_ids": ["w34HjHr6gAY", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["beeps, squawk, child speaking", "a, car, pass"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a man speaking to a car?", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "a telephone rings followed by a woman talking"], "sample_ids": ["w34HjHr6gAY", "tGcFnX0GHI"], "start_seconds": ["30", "0"], "properties": ["beeps, hit, woman", "ring, talk, woman"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", null], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a dial tone sounds followed by a woman speaking"], "question": "which entity has a woman talking", "label": 1}, {"captions": ["a woman talking as an infant is crying", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["tMbMDvT50j8", "vfYTJq7nU"], "start_seconds": ["12", "130"], "properties": ["a, talk, infant", "rustling, ducks, quack"], "captions_pred_video": ["shows a little girl covering her face with her hands while sitting at a table", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a duck quacks and a woman speaks"], "question": "which entity is about a woman talking to an infant?", "label": 0}, {"captions": ["loud, continuous burping", "a clock ticktocks"], "sample_ids": ["y636gklDioE", "v-g-j2uTByM"], "start_seconds": ["20", "30"], "properties": ["loud, continuous, burping", "ticktocks, clock, ticktocks"], "captions_pred_video": ["a dog sitting on a red chair in front of an old telephone", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a person burps loudly several times", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a helicopter engine runs continuously", "a woman speaks and other women and a man talk with her"], "sample_ids": ["ugHJF0hfYkg", "vbpKkWvfOu4"], "start_seconds": ["10", "560"], "properties": ["engine, running, continuously", "a, woman, man"], "captions_pred_video": ["a man in a helicopter cockpit wearing headphones", "2012-07-20 17 30 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20"], "captions_pred_audio": ["a helicopter is flying overhead ", "a woman is speaking and a man is speaking"], "question": "which entity has more people", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "some tunes played by whistling"], "sample_ids": ["vuUVPzd2FXw", "u6BnG6YZqJ4"], "start_seconds": ["160", "0"], "properties": ["a, steam, release", "tune, play, whistling"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a large bell chimes back and forth loudly", "some tunes played by whistling"], "sample_ids": ["w2M4i1mklOA", "u6BnG6YZqJ4"], "start_seconds": ["30", "0"], "properties": ["loud, chime, bell", "tune, play, whistling"], "captions_pred_video": ["footage of an antique clock", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a clock is ticking and a bell is ringing ", "a person whistling a song"], "question": "which entity is played by whistling", "label": 1}, {"captions": ["a person is burping while a girl speaks", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["vdoxuJn9lTc", "zFjIWfSD-4"], "start_seconds": ["40", "410"], "properties": ["person, burp, girl", "People, motor, brakes"], "captions_pred_video": ["a group of young girls playing a video game together in a living room", null], "captions_pred_audio": ["a child speaks followed by a burp", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is about a person speaking to a girl?", "label": 0}, {"captions": ["rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown", "people cheer as a vehicle engine revs"], "sample_ids": ["vfYTJq7nU", "xjhAnI2q6hM"], "start_seconds": ["130", "6"], "properties": ["rustling, ducks, quack", "engine revs, vehicle, people"], "captions_pred_video": [null, "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a duck quacks and a woman speaks", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a boat travels through the waves as the wind blows loudly and a man speaks over a radio", "paper is crumpling consistently"], "sample_ids": ["tDVADusiIoc", "v5cSxLaHADY"], "start_seconds": ["60", "0"], "properties": ["wind, radio, waves", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a man speaks as a car is passing by", "dishes cling together then a man begins to speak"], "sample_ids": ["sK4u5T8hW78", "sQGXqGcwOTc"], "start_seconds": ["30", "3"], "properties": ["a, car, pass", "cling, speak, dishes"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a man makes an exclamation, then another man speaks", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["xO-Q2BlIIPU", "zFjIWfSD-4"], "start_seconds": ["30", "410"], "properties": ["two men, exclamation, speak", "People, motor, brakes"], "captions_pred_video": ["a clock with a green glowing display showing the time 09 07 2016 12 31 2016", null], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity has more people", "label": 1}, {"captions": ["water flows as men speak and yell", "a telephone rings followed by a woman talking"], "sample_ids": ["vJ7JPEFhyLA", "tGcFnX0GHI"], "start_seconds": ["16", "0"], "properties": ["water, flow, men", "ring, talk, woman"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a dial tone sounds followed by a woman speaking"], "question": "which entity is a recording", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a clock ticktocks"], "sample_ids": ["uEU-Hg5MTN8", "v-g-j2uTByM"], "start_seconds": ["27", "30"], "properties": ["a woman, laughs, animal", "ticktocks, clock, ticktocks"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["an aircraft engine runs as people speak", "an airplane engine runs"], "sample_ids": ["wTjoRj1se3U", "yVPZ2MNWpms"], "start_seconds": ["390", "0"], "properties": ["engine, run, people", "engine, airplane, runs"], "captions_pred_video": ["footage of a man playing with a remote control airplane in a field", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a jet engine is running and people are talking", "a car is driving by on the road "], "question": "which entity has a running engine", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "a horn honks twice and keys jingle, followed by an electronic beep"], "sample_ids": ["uWAAAL4CIoc", "wSVhSdj0F0"], "start_seconds": ["0", "10"], "properties": ["a woman, chirps, animal", "horn honks, keys jingle, electronic beep"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a car horn honks and keys jangle with background noise "], "question": "which entity is more likely to be heard in a car", "label": 1}, {"captions": ["an audience gives applause", "a horn blasts as warning bells ring"], "sample_ids": ["x6iCUDmRpKQ", "zgUgkpk78xU"], "start_seconds": ["38", "70"], "properties": ["applause, audience, give", "horn, bells, ring"], "captions_pred_video": ["a black background with the moon and stars in the sky", "of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108"], "captions_pred_audio": ["a group of people are clapping and cheering", "a train blows its horn as it speeds down the tracks "], "question": "which is a warning", "label": 1}, {"captions": ["water drips and bubbles as a man speaks", "dishes cling together then a man begins to speak"], "sample_ids": ["vSeGhaZt-aI", "sQGXqGcwOTc"], "start_seconds": ["50", "3"], "properties": ["water, bubbles, speak", "cling, speak, dishes"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["water rushes and then a vehicle zooms past", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["s4Uz1Ffgo04", "xfaoyyzw2WU"], "start_seconds": ["100", "180"], "properties": ["water, rushes, vehicle", "loud, jet engine, roar"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["men speak and a nozzle sprays liquid", "a horn blasts as warning bells ring"], "sample_ids": ["wRV8yMk886E", "zgUgkpk78xU"], "start_seconds": ["0", "70"], "properties": ["liquid, spray, nozzle", "horn, bells, ring"], "captions_pred_video": ["two cars are parked in a parking lot at night", "of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108"], "captions_pred_audio": ["a man speaks followed by a loud burst", "a train blows its horn as it speeds down the tracks "], "question": "which entity is a warning device", "label": 1}, {"captions": ["a man speaks uses a drill", "a door slams shut and an object moves on a hard surface"], "sample_ids": ["x5eIC7S0fbg", "zkKdxzNC97Y"], "start_seconds": ["60", "27"], "properties": ["A man is speaking, uses a drill, and is a tool", "hard, surface, door"], "captions_pred_video": ["a person in surgical gloves is using a needle to remove a small object from a tooth", "footage of the door opening and closing in slow motion"], "captions_pred_audio": ["a man is speaking and using a power tool ", "a door is opened and closed"], "question": "which entity is a tool", "label": 0}, {"captions": ["a horse runs while two women talk", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["sdvI1mHAsc", "uYT5gxnyMWM"], "start_seconds": ["20", "50"], "properties": ["two women, horse, run", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["horses clip-clop and a woman speaks", "a woman is speaking and a baby is crying"], "question": "which entity has more people", "label": 1}, {"captions": ["a motorcycle engine is revving while people are speaking", "small dogs yip and bark sharply"], "sample_ids": ["y8dSeubCNI", "v-wcQf4BDY0"], "start_seconds": ["4", "120"], "properties": ["engine revving, people speaking, motorcycle", "bark, yip, sharply"], "captions_pred_video": [null, "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["an engine revving and people talking in the background", "a dog barks and growls"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a woman speaks happily and an animal chirps", "an engine runs loudly"], "sample_ids": ["uWAAAL4CIoc", "vqZuVbG6-HI"], "start_seconds": ["0", "130"], "properties": ["a woman, chirps, animal", "loud, engine, run"], "captions_pred_video": [null, "footage is blurry because it's raining outside"], "captions_pred_audio": ["a woman is speaking and a dog is barking ", "a lawn mower is running and men are speaking "], "question": "which entity is quieter", "label": 0}, {"captions": ["wind blows as people chatter quietly", "waves crash against a shoreline and people speak"], "sample_ids": ["xBxDz0CFVn0", "yFB25fqfU8I"], "start_seconds": ["30", "300"], "properties": ["wind, chatter, people", "wave, crash, shoreline"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a person surfing in the ocean"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a man is speaking and laughing while water is splashing and gurgling"], "question": "which entity is more calm", "label": 0}, {"captions": ["a man speaks while rain falls onto a hard surface", "an insect buzzes around continuously"], "sample_ids": ["wqN6IIHw3po", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["rain, surface, fall", "buzzes, continuously, insect"], "captions_pred_video": ["in your own words what is happening in this screenshot? blood splattered all over the place", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking and water is splashing", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["a machine clanks and thumps and a male speaks", "a man talks nearby and another man talks far away while some liquid flows"], "sample_ids": ["sWZzXuWYY", "sapQIQUhFc"], "start_seconds": ["420", "280"], "properties": ["male, clanks, thumps", "liquid, flow, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a man is speaking and a stream is flowing in the background "], "question": "which entity has a man speaking?", "label": 0}, {"captions": ["speaking following by laughing and clapping", "an animal quacks rapidly"], "sample_ids": ["u2f5NpsoHBg", "vh30P49Po6s"], "start_seconds": ["30", "30"], "properties": ["person, laugh, clap", "animal, quacks, rapidly"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "a duck is quacking loudly"], "question": "which entity is a person", "label": 0}, {"captions": ["music plays and animals vocalize as a cartoon character makes sounds", "an infant crying frantically"], "sample_ids": ["weDbePuc-Xc", "zwOBqeFTgiU"], "start_seconds": ["40", "30"], "properties": ["cartoon character, music, vocalize", "cry, infant, frantically"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "of the baby crying in the car seat"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["a infant makes noise and is excited", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["wIJK3-5y0kA", "tiDFTC-5vU"], "start_seconds": ["30", "30"], "properties": ["noise, excited, infant", "male, duck, laugh"], "captions_pred_video": ["of a baby playing with a cat in a dark room", null], "captions_pred_audio": ["a baby cries and a woman speaks", "a man is speaking and ducks are quacking"], "question": "which entity is speaking", "label": 1}, {"captions": ["two women and a man talk while a kid cries", "people applaud and hoot and chat quietly"], "sample_ids": ["wyllXV6PjKo", "wwyfGO2J4"], "start_seconds": ["30", "90"], "properties": ["a kid, talk, cry", "people, applaud, hoot"], "captions_pred_video": [null, null], "captions_pred_audio": ["a woman speaks and a baby cries", "people are clapping and speaking with background noise "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a child babbles as a woman speaks", "a woman speaks as she rubs two objects together"], "sample_ids": ["wEBlkGWVWwE", "vzxHnu-SFEw"], "start_seconds": ["260", "80"], "properties": ["a, babble, woman", "two objects, woman, speak"], "captions_pred_video": ["shows a person writing on the whiteboard", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a woman is speaking and a child is speaking with background noise and clapping ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which woman speaks as she rubs two objects together?", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["vSeGhaZt-aI", "tDVADusiIoc"], "start_seconds": ["50", "60"], "properties": ["water, bubbles, run", "water, radio, man"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity shows a man speaking over a radio?", "label": 1}, {"captions": ["a baby coos and fidgets as a lady speaks and laughs", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["uPDn2BFTHk", "wDVMhEdTiVw"], "start_seconds": ["140", "30"], "properties": ["lady, laugh, baby", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a baby laughs and a woman speaks", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to cause harm", "label": 1}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["t69a8aRKhmc", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["a, b, c", "a, scream, girl"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "people speak as gunfire rings out"], "sample_ids": ["slZLHwNbbt4", "wqTCwqVRDlk"], "start_seconds": ["300", "80"], "properties": ["a, horn, run", "gunfire, ring, speak"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "a man is speaking and a gun is fired"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a dog whimpers and a woman briefly talks", "vehicles pass by on a roadway"], "sample_ids": ["y1saVTXsKwc", "tgbONvsP47Y"], "start_seconds": ["80", "0"], "properties": ["a, dog, talk", "pass, vehicle, roadway"], "captions_pred_video": ["a dog playing with a pink ball", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a dog barks and a man speaks", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "people applaud and hoot and chat quietly"], "sample_ids": ["vSeGhaZt-aI", "wwyfGO2J4"], "start_seconds": ["50", "90"], "properties": ["water, bubbles, run", "people, applaud, hoot"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", null], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "people are clapping and speaking with background noise "], "question": "which entity has more people", "label": 1}, {"captions": ["a train whistle keeps going off while the clickety-clack of the train on the rails are continuous", "birds chirp and pigeons vocalize while walking around"], "sample_ids": ["ukg5L09Wpvo", "wIvYjuR3nrg"], "start_seconds": ["150", "9"], "properties": ["clickety-clack, train, whistle", "birds, pigeons, vocalize"], "captions_pred_video": ["footage of a train passing through a forest on a dirt road", "footage of a pigeon sitting on a roof with trees in the background"], "captions_pred_audio": ["a train blows its whistle and blows its horn ", "birds are chirping and cooing"], "question": "which entity is a bird?", "label": 1}, {"captions": ["ticking continues without interruption", "an infant crying as a woman laughs"], "sample_ids": ["v-g-j2uTByM", "xhmRY9yhC7c"], "start_seconds": ["30", "20"], "properties": ["ticking, continuous, clock", "a, laugh, infant"], "captions_pred_video": ["in your own words a cuckoo clock hanging on the wall", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a clock is ticking loudly", "a baby cries and a woman speaks"], "question": "which entity is not continuous", "label": 1}, {"captions": ["a man speaks and is typing on a keyboard", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["x9JovgqUcs", "uYT5gxnyMWM"], "start_seconds": ["500", "50"], "properties": ["a, man, speaks, keyboard", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man speaks and types on a keyboard", "a woman is speaking and a baby is crying"], "question": "which entity is a person speaking?", "label": 0}, {"captions": ["a man speaks over a radio as wind blows and water splashes", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["tDVADusiIoc", "w5W5Kqtc8E"], "start_seconds": ["60", "100"], "properties": ["water, radio, man", "wind, blow, vehicle"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", null], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has a vehicle engine running?", "label": 1}, {"captions": ["someone is typing on a computer keyboard", "a helicopter engine runs continuously"], "sample_ids": ["v0x1odnXtP0", "ugHJF0hfYkg"], "start_seconds": ["210", "10"], "properties": ["keyboard, type, computer", "engine, running, continuously"], "captions_pred_video": ["how to make money on youtube in spanish", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a person is typing on a keyboard", "a helicopter is flying overhead "], "question": "which entity is running continuously", "label": 1}, {"captions": ["several ducks quack and cocks crow far away", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sNB8zxXneIM", "zj2R0XoFr5k"], "start_seconds": ["20", "50"], "properties": ["several, quack, cocks", "airplane, boy, fly"], "captions_pred_video": ["a group of geese in a cage", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["birds chirp and a pop occurs before a man speaks", "a propeller rotates loudly and intensely"], "sample_ids": ["zuua6-5goWw", "ugHJF0hfYkg"], "start_seconds": ["30", "10"], "properties": ["sound, pop, bird", "loud, intense, propeller"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a helicopter is flying overhead "], "question": "which entity is louder", "label": 1}, {"captions": ["motor noise is followed by a horn honking and a siren wailing", "a propeller rotates loudly and intensely"], "sample_ids": ["y2bVZ7rz-5M", "ugHJF0hfYkg"], "start_seconds": ["280", "10"], "properties": ["motor noise, horn, siren", "loud, intense, propeller"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a helicopter is flying overhead "], "question": "which is louder", "label": 1}, {"captions": ["birds tweet and squawk", "an infant crying as a woman laughs"], "sample_ids": ["w1mlz3Pe4fU", "xhmRY9yhC7c"], "start_seconds": ["300", "20"], "properties": ["squawk, tweet, scream", "a, laugh, infant"], "captions_pred_video": ["of a bird in a cage", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["birds are chirping and singing", "a baby cries and a woman speaks"], "question": "which entity is a human", "label": 1}, {"captions": ["a person snores loudly multiple times at a close distance", "someone is typing on a computer keyboard"], "sample_ids": ["sSMl2vc3ek", "v0x1odnXtP0"], "start_seconds": ["20", "210"], "properties": ["loud, multiple, distance", "keyboard, type, computer"], "captions_pred_video": [null, "how to make money on youtube in spanish"], "captions_pred_audio": ["a person snoring loudly", "a person is typing on a keyboard"], "question": "which is not a person", "label": 1}, {"captions": ["the wind blows while a vehicle engine runs", "a child speaks in closed space"], "sample_ids": ["xyL9F5VrjkE", "yW6FWLSLkx4"], "start_seconds": ["20", "40"], "properties": ["wind, blows, vehicle", "child, space, speak"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "of the doll sitting on the bed with her hand outstretched towards the viewer"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a woman is speaking with background noise and breathing sounds "], "question": "which entity is speaking", "label": 1}, {"captions": ["some tunes played by whistling", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["u6BnG6YZqJ4", "su6FAOcOA8c"], "start_seconds": ["0", "4"], "properties": ["tune, play, whistling", "engine, idle, woman"], "captions_pred_video": ["a young boy standing in front of a group of kids in a classroom", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a person whistling a song", "a woman is speaking and a subway train is moving "], "question": "which is not a musical instrument", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "a machine beeps continuously"], "sample_ids": ["sShpyu2l4YQ", "y682ml90jGw"], "start_seconds": ["0", "11"], "properties": ["growl, bark, yip", "beeps, machine, continuously"], "captions_pred_video": ["the puppies are playing with a toy", null], "captions_pred_audio": ["a dog is barking and growling", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "water flows as men speak and yell"], "sample_ids": ["siJFXfGWgDk", "vJ7JPEFhyLA"], "start_seconds": ["50", "16"], "properties": ["a, bird, vehicle", "water, flow, men"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more water flowing", "label": 1}, {"captions": ["goats bleat and people speak", "a person is whistling"], "sample_ids": ["z5iUE5h0EPs", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["goats bleat, people speak, language", "person, whistling, person"], "captions_pred_video": ["of the goat in the barn", null], "captions_pred_audio": ["a goat bleats and a man speaks", "a person whistling a song"], "question": "which entity is a person", "label": 1}, {"captions": ["the rumbling of a bus followed by a soft male voice", "motor noise is followed by a horn honking and a siren wailing"], "sample_ids": ["vK93VuO0yNc", "y2bVZ7rz-5M"], "start_seconds": ["30", "280"], "properties": ["male voice, bus, rumble", "motor noise, horn, siren"], "captions_pred_video": ["footage is blurry due to the movement of the bus as it drives through the city at night", "footage of a parade of fire trucks driving down the street"], "captions_pred_audio": ["a car drives by with wind noise in the background ", "a truck is honking its horn and a siren is blaring "], "question": "which entity is followed by a horn honking and a siren wailing?", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["u--KhUW8l1Y", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["horn, siren, life", "a woman, something, fried"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["vehicle engines race around a track as a man commentates", "a woman speaks and laughs and an animal grunts and snorts"], "sample_ids": ["sZPuqDgX2V0", "uEU-Hg5MTN8"], "start_seconds": ["30", "27"], "properties": ["commentator, race, track", "animal, grunts, snorts"], "captions_pred_video": [null, "of a girl wearing a pig mask and a boy hugging her"], "captions_pred_audio": ["a man is speaking and a helicopter is flying overhead ", "a woman is speaking and a baby is crying"], "question": "which entity is a video of a race?", "label": 0}, {"captions": ["sirens ring and approach with humming of distant traffic", "winds blows roughly as a vehicle races past"], "sample_ids": ["xERFUeZONz8", "xjvTpk2Zpr8"], "start_seconds": ["0", "70"], "properties": ["ring, approach, traffic", "wind, blows, vehicle"], "captions_pred_video": ["footage is blurry due to camera shake or motion blur", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["an emergency vehicle siren blares", "a jet engine roars and wind blows "], "question": "which entity is more likely to be heard", "label": 0}, {"captions": ["wind blowing followed by a zoom", "a motorcycle engine works nearby"], "sample_ids": ["vr8ZXjEBhMQ", "tOSWIURC-4"], "start_seconds": ["150", "0"], "properties": ["wind, blow, zoom", "engine, work, nearby"], "captions_pred_video": ["is taken from a motorcycle's point of view", null], "captions_pred_audio": ["wind blows and a chainsaw cuts through wood ", "a lawn mower is running "], "question": "which entity is a zoom of", "label": 0}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "someone snores nearby"], "sample_ids": ["vbZ-0lGPneg", "spJCm8tD9Zo"], "start_seconds": ["30", "90"], "properties": ["a woman, a television program, a bird", "someone snores, nearby, someone"], "captions_pred_video": ["of a man holding a baby duck in his hands", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "a person is snoring loudly"], "question": "which entity is playing a television program", "label": 0}, {"captions": ["speaking following by laughing and clapping", "water flows and trickles"], "sample_ids": ["u2f5NpsoHBg", "tB7hWb9gTuQ"], "start_seconds": ["30", "30"], "properties": ["person, laugh, clap", "water, flow, trickle"], "captions_pred_video": ["is being projected on a screen at the front of the stage", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a woman is speaking and a crowd is clapping", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "a man speaks as a motor runs in the background"], "sample_ids": ["x5cuQjOdM3E", "xZepNM9qcRA"], "start_seconds": ["30", "30"], "properties": ["cat, meows, young woman", "background, motor, run"], "captions_pred_video": ["a black background with an airplane flying in the sky", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is quieter", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "an engine runs loudly"], "sample_ids": ["vf44CgrjT0A", "vqZuVbG6-HI"], "start_seconds": ["20", "130"], "properties": ["loud, long, person", "loud, engine, run"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "footage is blurry because it's raining outside"], "captions_pred_audio": ["a loud burp", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["sucking and grunting followed by slurping with birds in the background", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["yYEVLuqEytU", "w5W5Kqtc8E"], "start_seconds": ["40", "100"], "properties": ["grunt, slurp, background", "wind, blow, vehicle"], "captions_pred_video": ["a baby goat is being petted by a person's hand", null], "captions_pred_audio": ["several sheep bleat and a man speaks", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man speaks while video game music plays with some clicking", "a woman speaks and dog vocalizes"], "sample_ids": ["tw76HGONaKg", "uWAAAL4CIoc"], "start_seconds": ["570", "0"], "properties": ["music, click, man", "a, dog, vocalize"], "captions_pred_video": ["game you are currently playing on microsoft xbox 360 the legend of zelda ocarina of time the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda ocarina of time secrets, the legend of zelda ocarina of time wiki, the legend of zelda ocarina of time walkthrough, the legend of zelda ocarina of time guide, the legend of zelda ocarina of time tips, the legend of zelda ocarina of time cheats, the legend of zelda", null], "captions_pred_audio": ["a man speaks and types on a computer keyboard ", "a woman is speaking and a dog is barking "], "question": "which entity is a dog?", "label": 1}, {"captions": ["multiple insects buzz over rustling wind", "a person is burping then speaks and laughs"], "sample_ids": ["tMJne1a4AFI", "wAAkbZToh8"], "start_seconds": ["0", "0"], "properties": ["wind, buzz, rustling", "burp, laugh, speak"], "captions_pred_video": ["a swarm of bees on the ground", null], "captions_pred_audio": ["a swarm of bees buzzing around", "a man burps and a woman speaks"], "question": "which entity is not a person?", "label": 0}, {"captions": ["a man sprays as a scraping occurs in the background", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sOa7g-44Dag", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["background, man, spray", "airplane, boy, fly"], "captions_pred_video": ["footage and stock-footage/b-roll of a man in a hazmat suit spraying insulation in an attic", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and rubbing his hands together ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["a fly buzzes around loudly as birds chirp", "people speak in a closed space"], "sample_ids": ["uJV8NDaHqqk", "sTpirNYo8vQ"], "start_seconds": ["100", "30"], "properties": ["loud, fly, chirp", "people, space, speak"], "captions_pred_video": ["a bee hive in a wooden box", "of a man taking a selfie on a bus"], "captions_pred_audio": ["a swarm of bees buzzing around", "a man is speaking while a car is revving and accelerating "], "question": "which entity is quieter", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "food is frying then a woman speaks"], "sample_ids": ["siJFXfGWgDk", "ukxt9I7eMMg"], "start_seconds": ["50", "30"], "properties": ["man, woman, vehicle", "food, woman, speak"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "footage of a person preparing food on a stool in a kitchen"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a woman is speaking while food is frying in the background "], "question": "which entity has a woman speaking?", "label": 0}, {"captions": ["a toilet flushes and water drains", "paper is crumpling consistently"], "sample_ids": ["sfAvvZwdLCY", "v5cSxLaHADY"], "start_seconds": ["20", "0"], "properties": ["water drains, flushes, water", "paper is crumpling, paper is white, paper is crumpling"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of the person holding a pair of scissors"], "captions_pred_audio": ["a toilet is flushed", "paper is crumpled and crinkled"], "question": "which entity is crumpling", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["vSeGhaZt-aI", "yDoT73BWsdA"], "start_seconds": ["50", "10"], "properties": ["water, bubbles, run", "engine, revs, vehicle"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle", "label": 1}, {"captions": ["several insects fly while two men talk", "people applaud and hoot and chat quietly"], "sample_ids": ["s-T9OVOiMLo", "wwyfGO2J4"], "start_seconds": ["330", "90"], "properties": ["several, fly, men", "people, applaud, hoot"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", null], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "people are clapping and speaking with background noise "], "question": "which entity is more active", "label": 1}, {"captions": ["water splashing and a person laughs in the distance then a man speaks nearby", "a drill drills through something then people begin laughing"], "sample_ids": ["vddP56-ogds", "tEE3MpBt1sg"], "start_seconds": ["30", "50"], "properties": ["water, splash, person, laugh", "drill, something, laugh"], "captions_pred_video": [null, "footage is blurry due to the smoke in the air"], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "people are laughing breathing and speaking with background noise "], "question": "which entity is about a drill?", "label": 1}, {"captions": ["people speak in the background as a clock ticktocks", "water splashes and a door squeaks"], "sample_ids": ["vZAw4apG0Es", "sdXV-ylviw"], "start_seconds": ["30", "190"], "properties": ["background, clock, ticktocks", "sound, splash, door"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", null], "captions_pred_audio": ["a clock is ticking and people are talking", "a dog barks and taps with background noise "], "question": "which entity has a clock in the background?", "label": 0}, {"captions": ["a young female speaks, followed by spraying and a female screaming", "a man speaks as a motor runs in the background"], "sample_ids": ["uYT5gxnyMWM", "xZepNM9qcRA"], "start_seconds": ["50", "30"], "properties": ["female, spraying, scream", "background, motor, run"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a person burps loudly for a long time nearby", "a heavy rain falls endlessly"], "sample_ids": ["vf44CgrjT0A", "wP8ZKrlx3oA"], "start_seconds": ["20", "40"], "properties": ["loud, long, person", "heavy, rain, fall"], "captions_pred_video": ["a man in a striped shirt with his mouth open in front of a bookshelf", "footage of a flooded street in the middle of a desert with mountains in the background"], "captions_pred_audio": ["a loud burp", "a heavy rain is falling on a surface"], "question": "which entity is falling", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "a duck quacks continuously"], "sample_ids": ["yajyRTUQk3U", "vh30P49Po6s"], "start_seconds": ["400", "30"], "properties": ["noise, woman, speak", "quacks, continuously, duck"], "captions_pred_video": ["- a woman cooking in the kitchen", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["a man speaks as bees buzz and birds chirp", "a baby cries and fusses, a woman speaks, and a man speaks"], "sample_ids": ["t25U-v4k4ts", "wyllXV6PjKo"], "start_seconds": ["40", "30"], "properties": ["bees buzz, birds chirp, man speaks", "a baby, a woman, a man"], "captions_pred_video": ["of a beekeeper working on a beehive in the woods", null], "captions_pred_audio": ["a man is speaking and bees are buzzing", "a woman speaks and a baby cries"], "question": "which entity has a baby?", "label": 1}, {"captions": ["a man speaks followed by another man speaking outside", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["viuTg1M-dqg", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["two men, speak, follow", "a, scream, girl"], "captions_pred_video": ["footage of water coming out of a hole in the ground", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a woman is speaking and a baby is crying"], "question": "which entity has a scream followed by more girls talking?", "label": 1}, {"captions": ["a baby cries and a woman moans", "a man speaks as a motor runs in the background"], "sample_ids": ["smDKStoHBJo", "xZepNM9qcRA"], "start_seconds": ["0", "30"], "properties": ["a, cry, woman", "background, motor, run"], "captions_pred_video": ["a man holding a crying baby in his arms", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a man speaks as a vehicles passes by then a woman speaks", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["siJFXfGWgDk", "zj2R0XoFr5k"], "start_seconds": ["50", "50"], "properties": ["man, woman, vehicle", "airplane, boy, fly"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a woman speaks while a helicopter flies overhead "], "question": "which entity has a boy speaking?", "label": 1}, {"captions": ["a man speaks, another man speaks, and a small bell dings", "water splashes as an animal walks through"], "sample_ids": ["t69a8aRKhmc", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["a, b, c", "animal, water, splashes"], "captions_pred_video": ["footage is blurry and out of focus", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a train horn sounds as a railroad passing bell rings", "a woman speaks as she rubs two objects together"], "sample_ids": ["zgUgkpk78xU", "vzxHnu-SFEw"], "start_seconds": ["70", "80"], "properties": ["horn, bell, train", "two objects, woman, speak"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["distant men speak as a spray can nozzle is depressed", "multiple adults speaking, and a child shouting in the background"], "sample_ids": ["rwtmaKiCcQU", "yks4cLgIDMc"], "start_seconds": ["30", "170"], "properties": ["nozzle, depressed, spray can", "background, speaking, child"], "captions_pred_video": ["shows a man spraying paint on a wall with a spray gun", "footage of two kids wrestling on the floor"], "captions_pred_audio": ["spraying and people speaking", "a man is speaking and a child is crying"], "question": "which entity has a child shouting in the background?", "label": 1}, {"captions": ["metal clacking as food and oil sizzles followed by a woman talking", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["vW4x7S1VfQc", "xfaoyyzw2WU"], "start_seconds": ["150", "180"], "properties": ["clacking, oil, woman", "loud, jet engine, roar"], "captions_pred_video": ["footage of a person cooking fish in a frying pan on a stove top", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["food sizzles in a frying pan", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a clock ticks quietly and rhythmically", "an infant crying frantically"], "sample_ids": ["u7C-AEBQM", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["ticks, rhythmic, quiet", "cry, infant, frantically"], "captions_pred_video": [null, "of the baby crying in the car seat"], "captions_pred_audio": ["a ticktock of a clock", "a baby cries loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["a horn honks twice and keys jingle, followed by an electronic beep", "a man speaks as a motor runs in the background"], "sample_ids": ["wSVhSdj0F0", "xZepNM9qcRA"], "start_seconds": ["10", "30"], "properties": ["horn honks, keys jingle, electronic beep", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a car horn honks and keys jangle with background noise ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a motorcycle idles loudly as wind blows", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["v7jJS8aAyA", "tdWhHV3X25Q"], "start_seconds": ["10", "60"], "properties": ["wind, blows, loudly", "applause, audience, yells"], "captions_pred_video": [null, "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a motorcycle engine is idling and vibrating", "a man is speaking and a crowd is clapping"], "question": "which entity is a performance", "label": 1}, {"captions": ["a jet engine spools up and takes off", "water is sprayed across a hard surface"], "sample_ids": ["vBslzh7saPw", "sQwlkXjQabo"], "start_seconds": ["90", "10"], "properties": ["engine, spools, takes", "water, spray, surface"], "captions_pred_video": ["a pickup truck carrying a large object down the road", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a jet engine roars and accelerates ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a bell rings multiple times before a siren sounds in the distance", "people cheer as a vehicle engine revs"], "sample_ids": ["ul60S8TXDA8", "xjhAnI2q6hM"], "start_seconds": ["60", "6"], "properties": ["sound, distance, bell", "engine revs, vehicle, people"], "captions_pred_video": ["game the legend of zelda ocarina of time on nintendo 64", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a bell rings and mechanisms make ticking sounds ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["frogs croak and vocalize", "birds chirp and objects are moved around"], "sample_ids": ["yswmmRZFItk", "yPUYU6t3rwo"], "start_seconds": ["0", "370"], "properties": ["croak, vocalize, frog", "birds chirp, objects are moved around, birds"], "captions_pred_video": ["a close up of a frog in the water", "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a frog is croaking", "insects buzz and a man speaks"], "question": "which entity is a bird", "label": 1}, {"captions": ["an animal bleats and cries out and metal bangs", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["xfudFO976zE", "ukg5L09Wpvo"], "start_seconds": ["0", "150"], "properties": ["animal, bleats, cry", "clickety-clack, train, whistle"], "captions_pred_video": ["footage is blurry and shaky", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a goat bleats and birds chirp in the background ", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["vehicle tires screech and a man speaks before a car door opens", "a man speaks as a car is passing by"], "sample_ids": ["sxYkFKFIZD0", "sK4u5T8hW78"], "start_seconds": ["20", "30"], "properties": ["screech, man, door", "a, car, pass"], "captions_pred_video": ["2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2005 audi a4 1 8t 2", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating with a squeal in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is about a car passing by?", "label": 1}, {"captions": ["a car revs and accelerates loudly and men and women chatter among themselves", "a man speaks as a motor runs in the background"], "sample_ids": ["y8dSeubCNI", "xZepNM9qcRA"], "start_seconds": ["4", "30"], "properties": ["men, women, car", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["an engine revving and people talking in the background", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["a person whistles a meandering tune", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["uFoga8sHpiw", "uYT5gxnyMWM"], "start_seconds": ["90", "50"], "properties": ["person, tune, whistle", "female, spraying, scream"], "captions_pred_video": ["footage of a bird in a cage", "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a person whistles a song", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 0}, {"captions": ["a man laughs and speaks as cats purr and hiss", "people speak as gunfire rings out"], "sample_ids": ["vVhthZ45k3Y", "wqTCwqVRDlk"], "start_seconds": ["30", "80"], "properties": ["cat, purr, hiss", "gunfire, ring, speak"], "captions_pred_video": ["footage is blurry and out of focus", "of a woman shooting a gun at a target on the beach"], "captions_pred_audio": ["a man is speaking and a cat is meowing", "a man is speaking and a gun is fired"], "question": "which entity is more quiet", "label": 0}, {"captions": ["rain falls on a surface as men speak and music plays", "a girl speaks followed by a scream and more girls talking"], "sample_ids": ["w0xsN8X18Y", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["music, surface, rain", "a, scream, girl"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a woman is speaking and a baby is crying"], "question": "which entity has more people speaking", "label": 1}, {"captions": ["birds chirp, a woman speaks, and insects buzz", "two men and a woman talk while wind blows and birds tweet"], "sample_ids": ["t97k0cejSQE", "wqZ135Ssz0"], "start_seconds": ["250", "60"], "properties": ["sound, chirp, buzz", "two men, woman, birds"], "captions_pred_video": ["a bee on a purple thistle flower", null], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a man is speaking and ducks are quacking with wind noise in the background "], "question": "which entity has more birds", "label": 1}, {"captions": ["a toilet flushes and water drains", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["sfAvvZwdLCY", "wDVMhEdTiVw"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "gun, shoot, water"], "captions_pred_video": ["footage of the toilet in the bathroom", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a toilet is flushed", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is a source of water", "label": 1}, {"captions": ["music plays, a person speaks, followed by whooshes and a ding", "small dogs growl, bark and yip."], "sample_ids": ["tQWGZLItBXk", "sShpyu2l4YQ"], "start_seconds": ["170", "0"], "properties": ["music, person, ding", "growl, bark, yip"], "captions_pred_video": ["worms revolution screenshots", "the puppies are playing with a toy"], "captions_pred_audio": ["a child speaks music plays video game sounds sound effects and sound effects play ", "a dog is barking and growling"], "question": "which entity is more active", "label": 1}, {"captions": ["children speak and play together", "a man talks while a clock does ticktock"], "sample_ids": ["yVVP8XvWJTo", "spYNpeN7rPY"], "start_seconds": ["260", "1"], "properties": ["children, speak, play", "a clock, ticktock, man"], "captions_pred_video": ["footage of a playground at a school or daycare center", "in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a man is speaking and breathing with background noise "], "question": "which entity is a clock?", "label": 1}, {"captions": ["an infant crying frantically", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["zwOBqeFTgiU", "vbZ-0lGPneg"], "start_seconds": ["30", "30"], "properties": ["cry, infant, frantically", "a woman, a television program, a bird"], "captions_pred_video": ["of the baby crying in the car seat", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a baby cries loudly", "a woman is speaking and a dog is whimpering"], "question": "which entity is a person", "label": 1}, {"captions": ["race cars go around a track as a man commentates", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["uZesmtKZGSw", "wDVMhEdTiVw"], "start_seconds": ["250", "30"], "properties": ["car, track, man", "gun, shoot, water"], "captions_pred_video": ["1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a man is speaking and a car is revving with laughter in the background ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["waves crash against a shoreline and wind blows", "an airplane engine spools and people speak"], "sample_ids": ["zdYdyF9-m8U", "wTjoRj1se3U"], "start_seconds": ["7", "390"], "properties": ["wind, crash, shoreline", "airplane, engine, spool"], "captions_pred_video": ["a person kayaking in the ocean near a cliff", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["waves crash and wind blows ", "a jet engine is running and people are talking"], "question": "which entity is stationary", "label": 1}, {"captions": ["birds tweet and squawk", "wind blows as people chatter quietly"], "sample_ids": ["w1mlz3Pe4fU", "xBxDz0CFVn0"], "start_seconds": ["300", "30"], "properties": ["squawk, tweet, scream", "wind, chatter, people"], "captions_pred_video": ["of a bird in a cage", "footage is blurry and out of focus"], "captions_pred_audio": ["birds are chirping and singing", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a group of people chatter and talk as multiple horns honk in the background", "someone is typing on a computer keyboard"], "sample_ids": ["yLy-WycbVVE", "v0x1odnXtP0"], "start_seconds": ["30", "210"], "properties": ["background, people, talk", "keyboard, type, computer"], "captions_pred_video": ["a soccer field in a stadium with yellow and red seats", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking and a church bell is ringing with wind noise in the background ", "a person is typing on a keyboard"], "question": "which entity is typing on a keyboard", "label": 1}, {"captions": ["a man speaks while turning a water faucet on", "a propeller rotates loudly and intensely"], "sample_ids": ["vf9xf3vMsGM", "ugHJF0hfYkg"], "start_seconds": ["540", "10"], "properties": ["A man speaks while turning a water faucet on.", "loud, intense, propeller"], "captions_pred_video": ["of the person washing their hands under the faucet", "a man in a helicopter cockpit wearing headphones"], "captions_pred_audio": ["a man is speaking while water is running in the background", "a helicopter is flying overhead "], "question": "which entity is quieter", "label": 0}, {"captions": ["a toilet flushes and water drains", "a vehicle engine accelerating then running on idle"], "sample_ids": ["sfAvvZwdLCY", "vYkA3cfXp5Q"], "start_seconds": ["20", "30"], "properties": ["water drains, flushes, water", "engine, accelerate, idle"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a toilet is flushed", "an engine is idling"], "question": "which entity is a machine", "label": 1}, {"captions": ["water splashes as an animal walks through", "wind blows as people chatter quietly"], "sample_ids": ["w1ir-sZ3Im8", "xBxDz0CFVn0"], "start_seconds": ["90", "30"], "properties": ["animal, water, splashes", "wind, chatter, people"], "captions_pred_video": ["footage of a group of people riding horses through a river", "footage is blurry and out of focus"], "captions_pred_audio": ["water splashes and gurgles as people speak", "a man is speaking with wind noise in the background "], "question": "which entity is more quiet", "label": 1}, {"captions": ["a machine beeps continuously", "an engine runs loudly"], "sample_ids": ["y682ml90jGw", "vqZuVbG6-HI"], "start_seconds": ["11", "130"], "properties": ["beeps, machine, continuously", "loud, engine, run"], "captions_pred_video": [null, "footage is blurry because it's raining outside"], "captions_pred_audio": ["a beeping sound is being made ", "a lawn mower is running and men are speaking "], "question": "which entity is louder", "label": 1}, {"captions": ["a man is filing a hard object", "an airplane engine runs"], "sample_ids": ["vveS8HT7Uog", "yVPZ2MNWpms"], "start_seconds": ["100", "0"], "properties": ["a man, hard, object", "engine, airplane, runs"], "captions_pred_video": ["footage is of a workbench with various tools on it including a hammer and a screwdriver", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a man is filing and speaking with background noise and breathing ", "a car is driving by on the road "], "question": "which object is moving", "label": 1}, {"captions": ["a woman speaks and taps on a hard surface before running tap water", "some tunes played by whistling"], "sample_ids": ["wvKpEYswXO0", "u6BnG6YZqJ4"], "start_seconds": ["150", "0"], "properties": ["water, tap, run", "tune, play, whistling"], "captions_pred_video": ["of the person preparing food in the kitchen", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a woman is speaking and tapping with background noise and water running ", "a person whistling a song"], "question": "which entity is not a musical instrument?", "label": 0}, {"captions": ["a woman talks while a baby cries and a man whispers", "a stream of water runs briefly"], "sample_ids": ["smDKStoHBJo", "x-PeY8Yb8M4"], "start_seconds": ["0", "300"], "properties": ["a, talk, baby, cry", "stream, water, run"], "captions_pred_video": ["a man holding a crying baby in his arms", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a baby is crying and a woman is speaking", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a heavy rain falls endlessly", "continuous sneezing together with speech"], "sample_ids": ["wP8ZKrlx3oA", "x4dZyf9Gbj0"], "start_seconds": ["40", "130"], "properties": ["heavy, rain, fall", "continuous, sneeze, speech"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "footage is blurry and out of focus"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a woman sneezes and speaks"], "question": "which entity is more likely to be heard", "label": 1}, {"captions": ["a duck quacks several times", "a clock alarm sounds and gears turn"], "sample_ids": ["vh30P49Po6s", "w2M4i1mklOA"], "start_seconds": ["30", "30"], "properties": ["quacks, duck, several", "alarm, gears, turn"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "footage of an antique clock"], "captions_pred_audio": ["a duck is quacking loudly", "a clock is ticking and a bell is ringing "], "question": "which entity is a clock?", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "vehicles pass by on a roadway"], "sample_ids": ["vb1fPSDI4c", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["multiple, people, yell", "pass, vehicle, roadway"], "captions_pred_video": [null, "footage of a fire truck entering a garage"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a car is driving on the road "], "question": "which entity is more likely to be in motion", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "three men talk while wind blows and some liquid flows"], "sample_ids": ["zALy31PjDl0", "vJ7JPEFhyLA"], "start_seconds": ["21", "16"], "properties": ["a man, a vehicle, a horn", "three men, wind, flow"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity has more people", "label": 1}, {"captions": ["a railroad crossing bell rings as a train horn blows", "some men converse over an engine running"], "sample_ids": ["tZGN5a7ybxo", "sCiy7QS1U"], "start_seconds": ["60", "300"], "properties": ["ring, train, horn", "men, converse, engine"], "captions_pred_video": ["is taken from a moving vehicle on the train tracks", null], "captions_pred_audio": ["a train is moving and blowing its horn ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a train?", "label": 0}, {"captions": ["two women and a man talk while a kid cries", "water splashes as an animal walks through"], "sample_ids": ["wyllXV6PjKo", "w1ir-sZ3Im8"], "start_seconds": ["30", "90"], "properties": ["a kid, talk, cry", "animal, water, splashes"], "captions_pred_video": [null, "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a woman speaks and a baby cries", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks as horns blow", "water splashes as an animal walks through"], "sample_ids": ["tHyNqRyK34A", "w1ir-sZ3Im8"], "start_seconds": ["24", "90"], "properties": ["a, man, speaks", "animal, water, splashes"], "captions_pred_video": ["being taken from inside a vehicle on the street at night", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a man is speaking and a car is honking with background noise ", "water splashes and gurgles as people speak"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaking with light rustling", "some men converse over an engine running"], "sample_ids": ["zOZleIRqZm4", "sCiy7QS1U"], "start_seconds": ["80", "300"], "properties": ["light, rustling, man", "men, converse, engine"], "captions_pred_video": ["a person picking berries from the bushes in the garden", null], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a man speaking with light rustling?", "label": 0}, {"captions": ["a man talks followed by a woman shouting", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["s3cTDAj31g", "su6FAOcOA8c"], "start_seconds": ["80", "4"], "properties": ["man, talk, woman", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a man is speaking and a baby is crying", "a woman is speaking and a subway train is moving "], "question": "which entity has a woman speaking?", "label": 1}, {"captions": ["a horn blasts as warning bells ring", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["zgUgkpk78xU", "zj2R0XoFr5k"], "start_seconds": ["70", "50"], "properties": ["horn, bells, ring", "airplane, boy, fly"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is flying", "label": 1}, {"captions": ["multiple people speak and children yell while water gurgles", "wind blows as people chatter quietly"], "sample_ids": ["vb1fPSDI4c", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["multiple, people, yell", "wind, chatter, people"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a crowd of people are talking and laughing", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["sa6TLVbooCc", "tdWhHV3X25Q"], "start_seconds": ["240", "60"], "properties": ["people, laugh, child", "applause, audience, yells"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a grown man speaks and water bubbles and runs", "several insects fly while two men talk"], "sample_ids": ["vSeGhaZt-aI", "s-T9OVOiMLo"], "start_seconds": ["50", "330"], "properties": ["water, bubbles, run", "several, fly, men"], "captions_pred_video": ["a man in a kitchen preparing a smoothie with a blender", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking and pouring liquid with background noise ", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more moving parts", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "a machine beeps continuously"], "sample_ids": ["xyx6eNVEYRY", "y682ml90jGw"], "start_seconds": ["380", "11"], "properties": ["loud, engine, muffles", "beeps, machine, continuously"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", null], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a beeping sound is being made "], "question": "which entity is quieter", "label": 1}, {"captions": ["a rumble grows louder", "wind blows as people chatter quietly"], "sample_ids": ["y4MY9mp8-TA", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["loudness, increase, rumble", "wind, chatter, people"], "captions_pred_video": ["a helicopter flying in the sky", "footage is blurry and out of focus"], "captions_pred_audio": ["a helicopter flies overhead ", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a vehicle is skidding and squealing tires", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["soTOh3zYJfY", "zFjIWfSD-4"], "start_seconds": ["40", "410"], "properties": ["vehicle, skid, tires", "People, motor, brakes"], "captions_pred_video": ["a red car drifting on a winding road with smoke coming out of it", null], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["children cheer as a man speaks then an audience screams", "an infant crying as a woman laughs"], "sample_ids": ["vJvryTwuAV8", "xhmRY9yhC7c"], "start_seconds": ["16", "20"], "properties": ["audience, cheer, man", "a, laugh, infant"], "captions_pred_video": ["a crowd of people are gathered in a mall, watching a man take a selfie in front of the crowd", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking and a crowd is shouting and whooping ", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["rain falls on a surface as men speak and music plays", "some men converse over an engine running"], "sample_ids": ["w0xsN8X18Y", "sCiy7QS1U"], "start_seconds": ["30", "300"], "properties": ["music, surface, rain", "men, converse, engine"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking while a motorboat is moving in the background ", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is about a man speaking and music playing?", "label": 0}, {"captions": ["someone whistles a song", "pigeons vocalize and birds chirp"], "sample_ids": ["sIXTftIuUgw", "uiS58TNyUiw"], "start_seconds": ["90", "430"], "properties": ["someone, song, whistle", "vocalize, bird, chirp"], "captions_pred_video": [null, "of the pigeon in the cage"], "captions_pred_audio": ["a person whistling a song", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a loud engine muffles a man as he speaks", "a vehicle engine revs as the vehicle passes"], "sample_ids": ["xyx6eNVEYRY", "yDoT73BWsdA"], "start_seconds": ["380", "10"], "properties": ["loud, engine, muffles", "engine, revs, vehicle"], "captions_pred_video": ["footage of a helicopter landing on a runway at an airport", "a man driving a race car with a helmet on the steering wheel"], "captions_pred_audio": ["an aircraft engine is running and a man is speaking ", "a race car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a person sneezes followed by another person speaking", "someone snores nearby"], "sample_ids": ["t8CV69hcvF0", "spJCm8tD9Zo"], "start_seconds": ["210", "90"], "properties": ["person, sneeze, follow", "someone snores, nearby, someone"], "captions_pred_video": ["of an airplane flying in the dark sky at night", "of a man laying on the ground with his mouth open"], "captions_pred_audio": ["a woman sneezes and speaks", "a person is snoring loudly"], "question": "which entity is a person", "label": 0}, {"captions": ["children speak and play together", "an insect buzzes around continuously"], "sample_ids": ["yVVP8XvWJTo", "v25l1jef3JY"], "start_seconds": ["260", "0"], "properties": ["children, speak, play", "buzzes, continuously, insect"], "captions_pred_video": ["footage of a playground at a school or daycare center", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["children are speaking and breathing with background noise ", "a fly is buzzing around a microphone "], "question": "which entity is not a person", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "pigeons vocalize and birds chirp"], "sample_ids": ["sofxkNWaP0s", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["wind, engine, louder", "vocalize, bird, chirp"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "of the pigeon in the cage"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a toilet flushes and water drains", "gunshots ring out, a man yells, and more shots follow"], "sample_ids": ["sfAvvZwdLCY", "vKrYfzleLB8"], "start_seconds": ["20", "110"], "properties": ["water drains, flushes, water", "a, ring, gunshots"], "captions_pred_video": ["footage of the toilet in the bathroom", "stock footage of a person holding a gun in their hand"], "captions_pred_audio": ["a toilet is flushed", "a man is speaking with background noise and a cap gun is fired "], "question": "which entity is more likely to be in a movie", "label": 1}, {"captions": ["a young woman speaks and laughs and an animal snorts", "a man speaks as birds chirp and a vehicle passes nearby"], "sample_ids": ["uEU-Hg5MTN8", "siJFXfGWgDk"], "start_seconds": ["27", "50"], "properties": ["a woman, laughs, animal", "a, bird, vehicle"], "captions_pred_video": ["of a girl wearing a pig mask and a boy hugging her", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "a man is speaking and birds are chirping in the background "], "question": "which entity has a vehicle passing nearby?", "label": 1}, {"captions": ["cutting machine running then powering down followed by a series of metal clanking in the background", "a stream of water runs briefly"], "sample_ids": ["zofjfKhqLk8", "x-PeY8Yb8M4"], "start_seconds": ["10", "300"], "properties": ["background, metal, clank", "stream, water, run"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a car is driving on a wet road "], "question": "which entity is a stream of water?", "label": 1}, {"captions": ["a diesel truck engine runs continuously", "a duck quacks continuously"], "sample_ids": ["sZvwOuuPGP0", "vh30P49Po6s"], "start_seconds": ["50", "30"], "properties": ["engine, diesel, truck", "quacks, continuously, duck"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a medium engine is running ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["an engine idles consistently before sputtering some", "water pouring and bubbling"], "sample_ids": ["rwTERCUno", "uyRfq-jKPpo"], "start_seconds": ["90", "50"], "properties": ["engine, idle, sputter", "water, bubbles, pouring"], "captions_pred_video": [null, "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["an engine is idling and vibrating", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a man speaks as a car is passing by", "a bird chirps in response to a woman chirping for the birds"], "sample_ids": ["sK4u5T8hW78", "uOpoD0gGXcs"], "start_seconds": ["30", "120"], "properties": ["a, car, pass", "chirps, woman, bird"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "a herd of cows grazing in the field"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "birds are chirping and a man is speaking"], "question": "which entity is a response to a chirps", "label": 1}, {"captions": ["an engine revs and a turning noise is made", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["tOSWIURC-4", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["noise, engine, revs", "airplane, boy, fly"], "captions_pred_video": [null, "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a lawn mower is running ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a moving object", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "vehicles pass by on a roadway"], "sample_ids": ["sfAvvZwdLCY", "tgbONvsP47Y"], "start_seconds": ["20", "0"], "properties": ["flushes, drains, water", "pass, vehicle, roadway"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a toilet is flushed", "a car is driving on the road "], "question": "which entity is moving", "label": 1}, {"captions": ["music plays and repeated slaps accompany human sniveling, then insect buzz", "a man speaks followed by another man speaking outside"], "sample_ids": ["weDbePuc-Xc", "viuTg1M-dqg"], "start_seconds": ["40", "30"], "properties": ["music, slaps, human", "two men, speak, follow"], "captions_pred_video": ["a cartoon frog and a butterfly are sitting on the ground next to each other", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a man is speaking and birds are chirping with a frog croaking in the background ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has two men speaking?", "label": 1}, {"captions": ["animals bleat and cry out and then a woman speaks", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["yZp6xizR0yU", "zj2R0XoFr5k"], "start_seconds": ["30", "50"], "properties": ["animal, bleat, cry", "airplane, boy, fly"], "captions_pred_video": ["footage of a woman feeding goats in a barn", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a woman is speaking and a goat is bleating", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video of a plane flying by?", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "a door slams shut and an object moves on a hard surface"], "sample_ids": ["vuUVPzd2FXw", "zkKdxzNC97Y"], "start_seconds": ["160", "27"], "properties": ["a, steam, release", "hard, surface, door"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "footage of the door opening and closing in slow motion"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a door is opened and closed"], "question": "which entity is a door?", "label": 1}, {"captions": ["people speak and laugh as a child speaks", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["sa6TLVbooCc", "xfaoyyzw2WU"], "start_seconds": ["240", "180"], "properties": ["people, laugh, child", "loud, jet engine, roar"], "captions_pred_video": ["a video of a little boy trying to get a toy from a shelf in a store", "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a woman is speaking and a child is coughing", "an aircraft engine roars and a man speaks "], "question": "which is louder", "label": 1}, {"captions": ["a duck quacks several times", "an audience gives applause"], "sample_ids": ["vh30P49Po6s", "x6iCUDmRpKQ"], "start_seconds": ["30", "38"], "properties": ["quacks, duck, several", "applause, audience, give"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a black background with the moon and stars in the sky"], "captions_pred_audio": ["a duck is quacking loudly", "a group of people are clapping and cheering"], "question": "which is not a verb", "label": 0}, {"captions": ["water runs into a sink while men speak", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["vzceMbklWc", "vbZ-0lGPneg"], "start_seconds": ["180", "30"], "properties": ["water, sink, run", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["water is running and a man is speaking", "a woman is speaking and a dog is whimpering"], "question": "which entity has more moving parts", "label": 1}, {"captions": ["a child speaks in closed space", "a stream of water runs briefly"], "sample_ids": ["yW6FWLSLkx4", "x-PeY8Yb8M4"], "start_seconds": ["40", "300"], "properties": ["child, space, speak", "stream, water, run"], "captions_pred_video": ["of the doll sitting on the bed with her hand outstretched towards the viewer", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a woman is speaking with background noise and breathing sounds ", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["water pouring and bubbling", "pigeons vocalize and birds chirp"], "sample_ids": ["uyRfq-jKPpo", "uiS58TNyUiw"], "start_seconds": ["50", "430"], "properties": ["water, bubbles, pouring", "vocalize, bird, chirp"], "captions_pred_video": ["on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu", "of the pigeon in the cage"], "captions_pred_audio": ["water is running from a faucet", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["a person screams glaringly", "birds chirp and objects are moved around"], "sample_ids": ["xC8kbrKJmco", "yPUYU6t3rwo"], "start_seconds": ["0", "370"], "properties": ["glaringly, screams, person", "birds chirp, objects are moved around, birds"], "captions_pred_video": [null, "footage and stock-footage/b-roll of a beekeeper opening a beehive"], "captions_pred_audio": ["a goat is bleating ", "insects buzz and a man speaks"], "question": "which entity is more active", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "people cheer as a vehicle engine revs"], "sample_ids": ["sTpirNYo8vQ", "xjhAnI2q6hM"], "start_seconds": ["30", "6"], "properties": ["a, tone, fast", "engine revs, vehicle, people"], "captions_pred_video": ["of a man taking a selfie on a bus", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle?", "label": 1}, {"captions": ["a horn blasts loudly as a train passes", "water is sprayed across a hard surface"], "sample_ids": ["zsLxS-uLJTw", "sQwlkXjQabo"], "start_seconds": ["20", "10"], "properties": ["horn, blast, train", "water, spray, surface"], "captions_pred_video": ["footage of the train on the tracks at sunrise or sunset", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a train blows its horn and moves on the tracks ", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a duck quacks continuously", "a car speeding up in the distance"], "sample_ids": ["vh30P49Po6s", "u0TrcHhkPQ"], "start_seconds": ["30", "20"], "properties": ["quacks, continuously, duck", "distance, car, speed"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", null], "captions_pred_audio": ["a duck is quacking loudly", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a goat bleats and someone makes a calling noise", "an insect buzzes around continuously"], "sample_ids": ["vlS6YMeWAPo", "v25l1jef3JY"], "start_seconds": ["40", "0"], "properties": ["noise, bleat, call", "buzzes, continuously, insect"], "captions_pred_video": ["footage of a goat in a pen behind a wooden fence", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a goat bleats and birds chirp", "a fly is buzzing around a microphone "], "question": "which entity is not a noise", "label": 1}, {"captions": ["a cat meows and children speak", "an infant crying frantically"], "sample_ids": ["x5cuQjOdM3E", "zwOBqeFTgiU"], "start_seconds": ["30", "30"], "properties": ["cat, speak, children", "cry, infant, frantically"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of the baby crying in the car seat"], "captions_pred_audio": ["a cat meows and a woman speaks", "a baby cries loudly"], "question": "which entity is crying", "label": 1}, {"captions": ["someone whistles briefly", "a man speaks as a motor runs in the background"], "sample_ids": ["uFoga8sHpiw", "xZepNM9qcRA"], "start_seconds": ["90", "30"], "properties": ["sound, duration, pitch", "background, motor, run"], "captions_pred_video": ["footage of a bird in a cage", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a person whistles a song", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a longer duration", "label": 1}, {"captions": ["several beeps are followed by a hit and a woman talking", "water pouring and bubbling"], "sample_ids": ["w34HjHr6gAY", "uyRfq-jKPpo"], "start_seconds": ["30", "50"], "properties": ["beeps, hit, woman", "water, bubbles, pouring"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "water is running from a faucet"], "question": "which entity is a video of a liquid flowing and bubbling?", "label": 1}, {"captions": ["an emergency vehicle engine runs then a horn blows and siren sounds", "some men talk among st themselves as cars speed and race loudly"], "sample_ids": ["y2bVZ7rz-5M", "uZesmtKZGSw"], "start_seconds": ["280", "250"], "properties": ["engine, horn, siren", "men, talk, cars"], "captions_pred_video": ["footage of a parade of fire trucks driving down the street", "1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet holden monaro gts 1970 chevrolet"], "captions_pred_audio": ["a truck is honking its horn and a siren is blaring ", "a man is speaking and a car is revving with laughter in the background "], "question": "which entity is a vehicle", "label": 0}, {"captions": ["water flows as men speak and yell", "someone is typing on a computer keyboard"], "sample_ids": ["vJ7JPEFhyLA", "v0x1odnXtP0"], "start_seconds": ["16", "210"], "properties": ["water, flow, men", "keyboard, type, computer"], "captions_pred_video": ["a man in a red shirt paddling a kayak in the water", "how to make money on youtube in spanish"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a person is typing on a keyboard"], "question": "which is a still image", "label": 1}, {"captions": ["continuous snoring", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["sLkeqCDJIyw", "yajyRTUQk3U"], "start_seconds": ["120", "400"], "properties": ["loud, snoring, noise", "a woman, something, fried"], "captions_pred_video": [", what is the man doing on the couch? sleeping", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a person is snoring loudly", "a woman is speaking while food is frying in the background"], "question": "which entity is a video", "label": 1}, {"captions": ["two men speak as a buffeting wind blows", "a stream of water runs briefly"], "sample_ids": ["y8WEcpOlT3I", "x-PeY8Yb8M4"], "start_seconds": ["40", "300"], "properties": ["wind, speak, buffeting", "stream, water, run"], "captions_pred_video": ["on how to use a sewing machine youtube", "a man sitting on a rock in the middle of a river"], "captions_pred_audio": ["a man is speaking with wind noise in the background ", "a car is driving on a wet road "], "question": "which entity is moving", "label": 1}, {"captions": ["a horn rings out as a machine runs by", "water pouring and bubbling"], "sample_ids": ["slZLHwNbbt4", "uyRfq-jKPpo"], "start_seconds": ["300", "50"], "properties": ["a, horn, run", "water, bubbles, pouring"], "captions_pred_video": ["footage of a train coming down the tracks on a sunny day", "on youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro puff hairstyle youtube how to do an afro pu"], "captions_pred_audio": ["a train is moving and blowing its horn with a clickety-clack sound ", "water is running from a faucet"], "question": "which entity is a liquid", "label": 1}, {"captions": ["an adult woman and an adult man speak", "a frog vocalizes as birds chirp"], "sample_ids": ["zTLVJCo4WEE", "wqUmIEzuNz4"], "start_seconds": ["30", "30"], "properties": ["two people, adult, speak", "frog, bird, vocalize"], "captions_pred_video": ["- a boy with a rifle aiming at a target", "a frog sitting in the grass on a sunny day"], "captions_pred_audio": ["a woman speaks and crickets chirp", "a cat meows and rustles"], "question": "which entity is a single entity", "label": 1}, {"captions": ["children cry and people talk", "a jet engine roars, almost making a man inaudible"], "sample_ids": ["xLwHe825Zs", "xfaoyyzw2WU"], "start_seconds": ["18", "180"], "properties": ["people talk, children cry, people talk", "loud, jet engine, roar"], "captions_pred_video": [null, "footage of an airplane on the tarmac at an airport"], "captions_pred_audio": ["a baby cries and a woman speaks", "an aircraft engine roars and a man speaks "], "question": "which entity is louder", "label": 1}, {"captions": ["a man speaks as water trickles down a stream", "a person snores loudly multiple times at a close distance"], "sample_ids": ["sapQIQUhFc", "sSMl2vc3ek"], "start_seconds": ["280", "20"], "properties": ["water, stream, trickles", "loud, multiple, distance"], "captions_pred_video": [null, null], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["the clinking of a train bell with the humming of an engine and a train horn blowing", "a man speaks as a vehicles passes by then a woman speaks"], "sample_ids": ["zgUgkpk78xU", "siJFXfGWgDk"], "start_seconds": ["70", "50"], "properties": ["clinking, humming, horn", "man, woman, vehicle"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", "footage of a beekeeper working with bees in a beehive"], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a man is speaking and birds are chirping in the background "], "question": "which entity is a video of a man speaking?", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "an airplane engine runs"], "sample_ids": ["vs65y4qmyBE", "yVPZ2MNWpms"], "start_seconds": ["340", "0"], "properties": ["engine, run, man", "engine, airplane, runs"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "footage of an airport with planes parked on the tarmac"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a car is driving by on the road "], "question": "which entity has a running engine", "label": 1}, {"captions": ["a machine engine runs and a man speaks", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["vs65y4qmyBE", "vfYTJq7nU"], "start_seconds": ["340", "130"], "properties": ["engine, run, man", "rustling, ducks, quack"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", null], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a duck quacks and a woman speaks"], "question": "which entity is about a machine?", "label": 0}, {"captions": ["a man talks nearby and another man talks far away while some liquid flows", "continuous sneezing together with speech"], "sample_ids": ["sapQIQUhFc", "x4dZyf9Gbj0"], "start_seconds": ["280", "130"], "properties": ["liquid, flow, distance", "continuous, sneeze, speech"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking and a stream is flowing in the background ", "a woman sneezes and speaks"], "question": "which entity is more like a sneeze", "label": 1}, {"captions": ["a child yells and another yells", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["vMDHu7Lxcgw", "w5W5Kqtc8E"], "start_seconds": ["410", "100"], "properties": ["two, yell, child", "wind, blow, vehicle"], "captions_pred_video": ["a boy playing on a trampoline in the backyard", null], "captions_pred_audio": ["a woman is speaking and a child is shouting", "a motorboat is moving and people are shouting and cheering "], "question": "which entity has more than one person yelling?", "label": 0}, {"captions": ["an engine idles consistently before sputtering some", "gunshots ring out, a man yells, and more shots follow"], "sample_ids": ["rwTERCUno", "vKrYfzleLB8"], "start_seconds": ["90", "110"], "properties": ["engine, idle, sputter", "a, ring, gunshots"], "captions_pred_video": [null, "stock footage of a person holding a gun in their hand"], "captions_pred_audio": ["an engine is idling and vibrating", "a man is speaking with background noise and a cap gun is fired "], "question": "which entity is more violent", "label": 1}, {"captions": ["a airplane flies overhead as a woman speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["zj2R0XoFr5k", "wz7N8YRy74I"], "start_seconds": ["50", "30"], "properties": ["airplane, fly, woman", "rooster, crow, background, men"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster in the background?", "label": 1}, {"captions": ["metal rumbles followed by a kid giggles then more metal rumbling followed by a guy speaking", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sQGXqGcwOTc", "zj2R0XoFr5k"], "start_seconds": ["3", "50"], "properties": ["audio, kid, giggles", "airplane, boy, fly"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video?", "label": 1}, {"captions": ["people speak softly as food sizzles", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["yhQ2Lg-7qDY", "ukg5L09Wpvo"], "start_seconds": ["130", "150"], "properties": ["food, sizzle, speak", "clickety-clack, train, whistle"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a train blows its whistle and blows its horn "], "question": "which entity is a train?", "label": 1}, {"captions": ["heavy rain splashes as it falls", "multiple insects buzz over rustling wind"], "sample_ids": ["wP8ZKrlx3oA", "tMJne1a4AFI"], "start_seconds": ["40", "0"], "properties": ["fall, rain, splash", "wind, buzz, rustling"], "captions_pred_video": ["footage of a flooded street in the middle of a desert with mountains in the background", "a swarm of bees on the ground"], "captions_pred_audio": ["a heavy rain is falling on a surface", "a swarm of bees buzzing around"], "question": "which entity is not a splash", "label": 1}, {"captions": ["an engine runs loudly", "water flows and trickles"], "sample_ids": ["vqZuVbG6-HI", "tB7hWb9gTuQ"], "start_seconds": ["130", "30"], "properties": ["loud, engine, run", "water, flow, trickle"], "captions_pred_video": ["footage is blurry because it's raining outside", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["a lawn mower is running and men are speaking ", "water is splashing and gurgling"], "question": "which entity is quieter", "label": 1}, {"captions": ["birds chirp and a dog breathes heavily", "a young female speaks, followed by spraying and a female screaming"], "sample_ids": ["y2ZBGpgbhHM", "uYT5gxnyMWM"], "start_seconds": ["30", "50"], "properties": ["dog, chirp, breathe", "female, spraying, scream"], "captions_pred_video": [null, "footage of a person spraying paint on the ceiling"], "captions_pred_audio": ["birds chirping and a dog panting", "a woman is speaking and a baby is crying"], "question": "which entity is a person", "label": 1}, {"captions": ["a motor runs in the distance as a soft wind periodically gusts", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["xyL9F5VrjkE", "vbZ-0lGPneg"], "start_seconds": ["20", "30"], "properties": ["wind, motor, distance", "a woman, a television program, a bird"], "captions_pred_video": ["of a caterpillar truck loading logs into a trailer", "of a man holding a baby duck in his hands"], "captions_pred_audio": ["the wind is blowing and a car is passing by ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a television program?", "label": 1}, {"captions": ["loud ringing of a telephone stops followed by a man speaking and a digital beep", "someone whistles a tune"], "sample_ids": ["uzQnlJXBbOM", "sIXTftIuUgw"], "start_seconds": ["50", "90"], "properties": ["ringing, beep, stop", "someone, tune, whistle"], "captions_pred_video": ["footage of a person using a cell phone on a table", null], "captions_pred_audio": ["a telephone rings and a man speaks", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a cat meows as a young woman speaks", "birds chirp and wind blows"], "sample_ids": ["x5cuQjOdM3E", "sxIvBMSavMQ"], "start_seconds": ["30", "210"], "properties": ["cat, meows, young woman", "birds, chirp, wind"], "captions_pred_video": ["a black background with an airplane flying in the sky", "beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a beehive beekeeping 101 how to extract honey from a"], "captions_pred_audio": ["a cat meows and a woman speaks", "birds are chirping and insects are buzzing"], "question": "which entity is more quiet", "label": 1}, {"captions": ["a toilet flushes and water drains", "females talk and laugh over gusting wind"], "sample_ids": ["sfAvvZwdLCY", "un9VQlzgZM"], "start_seconds": ["20", "5"], "properties": ["water drains, flushes, water", "females, talk, laugh"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a woman is speaking and laughing with wind noise and breathing in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["birds fly and flutter around", "winds blows roughly as a vehicle races past"], "sample_ids": ["wGKgwOP3h30", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["fly, flutter, around", "wind, blows, vehicle"], "captions_pred_video": ["of the pigeons in the coop", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["pigeons coo and flap their wings", "a jet engine roars and wind blows "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a race car approaches quickly and slows down squealing tires", "a duck quacks continuously"], "sample_ids": ["sEprKHm8Sj8", "vh30P49Po6s"], "start_seconds": ["90", "30"], "properties": ["car, tires, slows", "quacks, continuously, duck"], "captions_pred_video": ["rally 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a duck is quacking loudly"], "question": "which entity is a bird?", "label": 1}, {"captions": ["an engine runs and wind blows", "a vehicle accelerates before a race car idles then accelerates quickly"], "sample_ids": ["vs65y4qmyBE", "sjlVMgdGSK0"], "start_seconds": ["340", "30"], "properties": ["engine, run, wind", "accelerates, vehicle, race car"], "captions_pred_video": ["a car is engulfed in flames on the side of the road", "a 1965 ford falcon drag racing at 100mph on a 1/8 mile track"], "captions_pred_audio": ["a heavy engine is running and men are speaking ", "a car accelerates and revs its engine "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a siren comes to life as a horn blares", "some tunes played by whistling"], "sample_ids": ["u--KhUW8l1Y", "u6BnG6YZqJ4"], "start_seconds": ["0", "0"], "properties": ["horn, siren, life", "tune, play, whistling"], "captions_pred_video": ["a firefighter spraying water from a fire hydrant at night", "a young boy standing in front of a group of kids in a classroom"], "captions_pred_audio": ["a fire truck siren blares and a horn blows ", "a person whistling a song"], "question": "which entity is a musical instrument", "label": 1}, {"captions": ["a duck quacks loudly and continuously", "an insect buzzes around continuously"], "sample_ids": ["vh30P49Po6s", "v25l1jef3JY"], "start_seconds": ["30", "0"], "properties": ["loud, continuous, quacks", "buzzes, continuously, insect"], "captions_pred_video": ["of a man brushing his teeth with a toothbrush in his mouth", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a duck is quacking loudly", "a fly is buzzing around a microphone "], "question": "which entity is quieter", "label": 1}, {"captions": ["a horn honks followed by a loud continuous buzzing while men speak", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["wsHBIgzs9Fs", "ukg5L09Wpvo"], "start_seconds": ["50", "150"], "properties": ["horn, continuous, buzzing", "clickety-clack, train, whistle"], "captions_pred_video": ["shows a motorcycle riding down a country road with a motorcycle in the foreground", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a car accelerates and revs its engine while a man speaks ", "a train blows its whistle and blows its horn "], "question": "which entity is continuous", "label": 1}, {"captions": ["a man talks while a clock does ticktock", "an insect buzzes around continuously"], "sample_ids": ["spYNpeN7rPY", "v25l1jef3JY"], "start_seconds": ["1", "0"], "properties": ["a clock, ticktock, man", "buzzes, continuously, insect"], "captions_pred_video": ["in 10 words or less what is the name of the song in the maybank advertisement? maybank advertisement, maybank, advertisement, advertisements, advertisement video, advertisements video, advertisements, advertisement, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertisement video, advertis", "a black background with a cartoon character in the foreground"], "captions_pred_audio": ["a man is speaking and breathing with background noise ", "a fly is buzzing around a microphone "], "question": "which entity is moving", "label": 1}, {"captions": ["a few ducks quack and scamper and a man speaks", "an audience gives applause as a man yells and a group sings"], "sample_ids": ["w2bYrCVLT60", "tdWhHV3X25Q"], "start_seconds": ["120", "60"], "properties": ["ducks, speak, quack", "applause, audience, yells"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", "a man is talking to another man on a stage in front of a microphone"], "captions_pred_audio": ["ducks are quacking and a man is speaking", "a man is speaking and a crowd is clapping"], "question": "which entity is more active", "label": 1}, {"captions": ["a goat screams and people speak in the background", "tapping occurs then a baby cries"], "sample_ids": ["xC8kbrKJmco", "wIJK3-5y0kA"], "start_seconds": ["0", "30"], "properties": ["background, goat, scream", "a, cry, baby"], "captions_pred_video": [null, "of a baby playing with a cat in a dark room"], "captions_pred_audio": ["a goat is bleating ", "a baby cries and a woman speaks"], "question": "which entity is crying", "label": 1}, {"captions": ["a few ducks quack and scamper and a man speaks", "someone whistles a tune"], "sample_ids": ["w2bYrCVLT60", "sIXTftIuUgw"], "start_seconds": ["120", "90"], "properties": ["ducks, speak, quack", "someone, tune, whistle"], "captions_pred_video": ["of the ducks drinking from a pink pool in the grass", null], "captions_pred_audio": ["ducks are quacking and a man is speaking", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["a person is snoring while sleeping", "several insects fly while two men talk"], "sample_ids": ["vJrjSeP17yE", "s-T9OVOiMLo"], "start_seconds": ["40", "330"], "properties": ["a person is sleeping, snoring, person", "several, fly, men"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a person snoring loudly", "a man is speaking while insects are buzzing in the background "], "question": "which entity is a person", "label": 0}, {"captions": ["a woman speaks and is crumpling paper", "several beeps are followed by a hit and a woman talking"], "sample_ids": ["xvDdE3zNf8Y", "w34HjHr6gAY"], "start_seconds": ["120", "30"], "properties": ["A, crumple, paper", "beeps, hit, woman"], "captions_pred_video": ["of a woman in a white shirt and glasses holding a purple tie", "the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz"], "captions_pred_audio": ["a woman speaks and crumples paper", "a beep sounds followed by a child speaking"], "question": "which entity is a video of a woman speaking and crumpling paper?", "label": 0}, {"captions": ["birds chirp quietly and an adult man speaks", "winds blows roughly as a vehicle races past"], "sample_ids": ["zuua6-5goWw", "xjvTpk2Zpr8"], "start_seconds": ["30", "70"], "properties": ["birds, chirp, quiet, man, speaks", "wind, blows, vehicle"], "captions_pred_video": ["in your own words screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot 10 of 10] screenshot", "footage of a dhl plane landing on the runway"], "captions_pred_audio": ["birds are chirping and a man is speaking with background noise ", "a jet engine roars and wind blows "], "question": "which entity is more calm", "label": 0}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "a man speaks as a motor runs in the background"], "sample_ids": ["vuUVPzd2FXw", "xZepNM9qcRA"], "start_seconds": ["160", "30"], "properties": ["a, steam, release", "background, motor, run"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity has a motor running in the background?", "label": 1}, {"captions": ["wind noise makes sound into a microphone", "pigeons vocalize and birds chirp"], "sample_ids": ["w8uLijTqtlU", "uiS58TNyUiw"], "start_seconds": ["70", "430"], "properties": ["wind, microphone, noise", "vocalize, bird, chirp"], "captions_pred_video": ["footage is blurry and shaky", "of the pigeon in the cage"], "captions_pred_audio": ["the wind is blowing strongly", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a speedboat passes quickly on the water", "water is sprayed across a hard surface"], "sample_ids": ["tjmoSi330GM", "sQwlkXjQabo"], "start_seconds": ["23", "10"], "properties": ["speed, water, boat", "water, spray, surface"], "captions_pred_video": ["a person riding a jet ski on a lake with trees in the background", "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a motorboat speeds through water with wind noise ", "spraying followed by silence"], "question": "which entity is moving across a hard surface", "label": 1}, {"captions": ["paper is crumpling consistently", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["v5cSxLaHADY", "wDVMhEdTiVw"], "start_seconds": ["0", "30"], "properties": ["paper is crumpling, paper is white, paper is crumpling", "gun, shoot, water"], "captions_pred_video": ["footage of the person holding a pair of scissors", "a blurry image of trees and water in the forest"], "captions_pred_audio": ["paper is crumpled and crinkled", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is not a video of a gun shooting?", "label": 0}, {"captions": ["an electric engine works nearby followed by a child talking", "people cheer as a vehicle engine revs"], "sample_ids": ["xSKJGCItUWE", "xjhAnI2q6hM"], "start_seconds": ["10", "6"], "properties": ["engine, work, child", "engine revs, vehicle, people"], "captions_pred_video": ["footage of the helicopter flying in the room", "a school bus decorated with christmas lights is floating in the water"], "captions_pred_audio": ["a high pitched engine is running and a child speaks", "a truck is revving its engine and a man is speaking "], "question": "which entity is about a vehicle engine?", "label": 1}, {"captions": ["water is sprayed across a hard surface", "a man speaks followed by another man speaking outside"], "sample_ids": ["sQwlkXjQabo", "viuTg1M-dqg"], "start_seconds": ["10", "30"], "properties": ["water, spray, surface", "two men, speak, follow"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["spraying followed by silence", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of two men speaking?", "label": 1}, {"captions": ["a vehicle accelerates before a race car idles then accelerates quickly", "pigeons vocalize and birds chirp"], "sample_ids": ["sjlVMgdGSK0", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["accelerates, vehicle, race car", "vocalize, bird, chirp"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "of the pigeon in the cage"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["a stream of water flows quickly", "loud intermittent buzzing with intermittent laughter"], "sample_ids": ["wbHTKEJZyhc", "sLUnaPT5gM8"], "start_seconds": ["20", "0"], "properties": ["stream, water, flow", "loud, laughter, intermittent"], "captions_pred_video": ["footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and a bridge in the background video footage of a river in autumn with trees and", "of a baby laying on his stomach in a blue shirt and diaper"], "captions_pred_audio": ["a waterfall is flowing and people are speaking ", "a baby is laughing and breathing while a man is speaking "], "question": "which entity is more quiet", "label": 0}, {"captions": ["a clock ticks quietly and rhythmically", "a stream of water flows as people talk and wind blows"], "sample_ids": ["u7C-AEBQM", "xBxDz0CFVn0"], "start_seconds": ["30", "30"], "properties": ["ticks, rhythmic, quiet", "stream, water, flow"], "captions_pred_video": [null, "footage is blurry and out of focus"], "captions_pred_audio": ["a ticktock of a clock", "a man is speaking with wind noise in the background "], "question": "which entity is moving", "label": 1}, {"captions": ["a man talks followed by a woman shouting", "water is sprayed across a hard surface"], "sample_ids": ["s3cTDAj31g", "sQwlkXjQabo"], "start_seconds": ["80", "10"], "properties": ["man, talk, woman", "water, spray, surface"], "captions_pred_video": [null, "a close-up of a red car with water droplets on the hood"], "captions_pred_audio": ["a man is speaking and a baby is crying", "spraying followed by silence"], "question": "which entity is a liquid", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "a loud snarling engine is followed by a man laughing"], "sample_ids": ["sfAvvZwdLCY", "zl9Dqx-j7q4"], "start_seconds": ["20", "6"], "properties": ["flushes, drains, water", "engine, laugh, loud"], "captions_pred_video": ["footage of the toilet in the bathroom", "footage of a man driving a car in the dark"], "captions_pred_audio": ["a toilet is flushed", "a jet engine roars "], "question": "which entity is louder", "label": 1}, {"captions": ["a woman speaks in a fast tone with a male", "someone whistles a tune"], "sample_ids": ["sTpirNYo8vQ", "sIXTftIuUgw"], "start_seconds": ["30", "90"], "properties": ["a, tone, fast", "someone, tune, whistle"], "captions_pred_video": ["of a man taking a selfie on a bus", null], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a person whistling a song"], "question": "which entity is a musical performance", "label": 1}, {"captions": ["people clap and speak in the distance", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["wwyfGO2J4", "tDVADusiIoc"], "start_seconds": ["90", "60"], "properties": ["clap, distance, speak", "water, radio, man"], "captions_pred_video": [null, "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaking together with birds chirping and distant murmuring", "water splashes and wind noise is made into a microphone"], "sample_ids": ["uiS58TNyUiw", "sDSppXIlJrs"], "start_seconds": ["430", "27"], "properties": ["audio, man, speaking", "microphone, water, wind"], "captions_pred_video": ["of the pigeon in the cage", "a man is paddling a small wooden boat in the water"], "captions_pred_audio": ["a man is speaking and a bee is buzzing", "the wind is blowing and water is splashing"], "question": "which entity is a recording of a man speaking?", "label": 0}, {"captions": ["water is sprayed across a hard surface", "pigeons vocalize and birds chirp"], "sample_ids": ["sQwlkXjQabo", "uiS58TNyUiw"], "start_seconds": ["10", "430"], "properties": ["water, spray, surface", "vocalize, bird, chirp"], "captions_pred_video": ["a close-up of a red car with water droplets on the hood", "of the pigeon in the cage"], "captions_pred_audio": ["spraying followed by silence", "a man is speaking and a bee is buzzing"], "question": "which entity is not a bird?", "label": 0}, {"captions": ["birds chirp as a bell rings", "multiple people speak and children yell while water gurgles"], "sample_ids": ["ziUT9IFTkjg", "vb1fPSDI4c"], "start_seconds": ["10", "30"], "properties": ["chirp, bell, ring", "multiple, people, yell"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a crowd of people are talking and laughing"], "question": "which entity is more quiet", "label": 0}, {"captions": ["water flows as a woman laughs and a man speaks", "females talk and laugh over gusting wind"], "sample_ids": ["vddP56-ogds", "un9VQlzgZM"], "start_seconds": ["30", "5"], "properties": ["water, flow, laugh", "females, talk, laugh"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running and gurgling and a man is speaking", "a woman is speaking and laughing with wind noise and breathing in the background "], "question": "which entity has more people laughing", "label": 1}, {"captions": ["an insect buzzes around continuously", "a woman speaks followed by another woman whimpering and speaking"], "sample_ids": ["v25l1jef3JY", "xOZfdgAgJ9o"], "start_seconds": ["0", "40"], "properties": ["buzzes, continuously, insect", "woman, whimpering, speaking"], "captions_pred_video": ["a black background with a cartoon character in the foreground", "footage of a woman talking to a man in a doctor's office"], "captions_pred_audio": ["a fly is buzzing around a microphone ", "a woman is speaking and a baby is crying"], "question": "which entity is speaking", "label": 1}, {"captions": ["birds chirp then an animal grunts", "a man speaks as a car is passing by"], "sample_ids": ["tDlysoZiA1I", "sK4u5T8hW78"], "start_seconds": ["0", "30"], "properties": ["animal, grunt, chirp", "a, car, pass"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a moving object", "label": 1}, {"captions": ["water splashes as an animal walks through", "small dogs yip and bark sharply"], "sample_ids": ["w1ir-sZ3Im8", "v-wcQf4BDY0"], "start_seconds": ["90", "120"], "properties": ["animal, water, splashes", "bark, yip, sharply"], "captions_pred_video": ["footage of a group of people riding horses through a river", "footage is blurry and shaky, making it difficult to see what is happening"], "captions_pred_audio": ["water splashes and gurgles as people speak", "a dog barks and growls"], "question": "which animal is more active", "label": 1}, {"captions": ["a woman speaks over sizzling noise", "a man speaks over a radio as wind blows and water splashes"], "sample_ids": ["yajyRTUQk3U", "tDVADusiIoc"], "start_seconds": ["400", "60"], "properties": ["noise, woman, speak", "water, radio, man"], "captions_pred_video": ["- a woman cooking in the kitchen", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking over a radio?", "label": 1}, {"captions": ["people talk quietly in the distance, followed by a police car siren wailing", "vehicles pass by on a roadway"], "sample_ids": ["wy1eKjR7KC0", "tgbONvsP47Y"], "start_seconds": ["30", "0"], "properties": ["people, talk, distance", "pass, vehicle, roadway"], "captions_pred_video": ["two police officers riding motorcycles down the street", "footage of a fire truck entering a garage"], "captions_pred_audio": ["a man is speaking and a siren is going off", "a car is driving on the road "], "question": "which entity is more likely to be in motion", "label": 1}, {"captions": ["cats meow and then a person begins to talk while the cats continue to meow", "pigeons vocalize and birds chirp"], "sample_ids": ["x5cuQjOdM3E", "uiS58TNyUiw"], "start_seconds": ["30", "430"], "properties": ["cat, talk, meow", "vocalize, bird, chirp"], "captions_pred_video": ["a black background with an airplane flying in the sky", "of the pigeon in the cage"], "captions_pred_audio": ["a cat meows and a woman speaks", "a man is speaking and a bee is buzzing"], "question": "which entity is a bird", "label": 1}, {"captions": ["loud clanking and banging with brief male speech", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["sWZzXuWYY", "su6FAOcOA8c"], "start_seconds": ["420", "4"], "properties": ["male, speech, banging", "engine, idle, woman"], "captions_pred_video": [null, "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a sewing machine runs and a man speaks", "a woman is speaking and a subway train is moving "], "question": "which entity is a bus?", "label": 1}, {"captions": ["a woman and man speak while food is frying", "water splashes as an animal walks through"], "sample_ids": ["zk-xJGQU8-4", "w1ir-sZ3Im8"], "start_seconds": ["130", "90"], "properties": ["food, man, woman", "animal, water, splashes"], "captions_pred_video": ["a man and a woman cooking in a wok on the stove", "footage of a group of people riding horses through a river"], "captions_pred_audio": ["a woman is speaking while dishes are clanging and music is playing in the background ", "water splashes and gurgles as people speak"], "question": "which entity is about a person", "label": 0}, {"captions": ["a consistent ticking pattern", "a person snores loudly multiple times at a close distance"], "sample_ids": ["sCeWURVHfOM", "sSMl2vc3ek"], "start_seconds": ["30", "20"], "properties": ["ticking, pattern, clock", "loud, multiple, distance"], "captions_pred_video": ["- a close-up view of the clock's inner workings", null], "captions_pred_audio": ["ticking of a clock", "a person snoring loudly"], "question": "which entity is not a clock?", "label": 1}, {"captions": ["people converse in the distance as a clock ticks", "small dogs growl, bark and yip."], "sample_ids": ["vZAw4apG0Es", "sShpyu2l4YQ"], "start_seconds": ["30", "0"], "properties": ["people, clock, converse", "growl, bark, yip"], "captions_pred_video": ["a clock made out of wood and gears with birds on top of it", "the puppies are playing with a toy"], "captions_pred_audio": ["a clock is ticking and people are talking", "a dog is barking and growling"], "question": "which entity is more active", "label": 1}, {"captions": ["a man speaks and wind blows as an aircraft engine becomes louder", "a toilet flushes and a female speaks"], "sample_ids": ["sofxkNWaP0s", "yaln9y8I7ms"], "start_seconds": ["30", "230"], "properties": ["wind, engine, louder", "female, flushes, toilet"], "captions_pred_video": ["of the airplane taking off from the runway with the speed limit sign in the foreground", "footage is blurry and out of focus"], "captions_pred_audio": ["a man is speaking while a jet engine roars in the background ", "a toilet flushes and a man speaks"], "question": "which entity is a bathroom?", "label": 1}, {"captions": ["a car speeds away loudly followed by a car revving loudly and driving away while outside", "three men talk while wind blows and some liquid flows"], "sample_ids": ["sjlVMgdGSK0", "vJ7JPEFhyLA"], "start_seconds": ["30", "16"], "properties": ["car, revving, loudly", "three men, wind, flow"], "captions_pred_video": ["a 1965 ford falcon drag racing at 100mph on a 1/8 mile track", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["a car accelerates and revs its engine ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is not a car?", "label": 1}, {"captions": ["a small airplane approaches and then flies by, after and during which a boy speaks", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["zj2R0XoFr5k", "su6FAOcOA8c"], "start_seconds": ["50", "4"], "properties": ["airplane, boy, fly", "engine, idle, woman"], "captions_pred_video": ["footage of a small airplane flying in the sky stock videos and royalty-free footage", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a woman speaks while a helicopter flies overhead ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["a person is snoring while sleeping", "a frog croaks as other frogs croak in the background"], "sample_ids": ["vJrjSeP17yE", "yswmmRZFItk"], "start_seconds": ["40", "0"], "properties": ["a person is sleeping, snoring, person", "background, frog, croak"], "captions_pred_video": ["a black background with a small plane flying in the sky", "a close up of a frog in the water"], "captions_pred_audio": ["a person snoring loudly", "a frog is croaking"], "question": "which entity is a croaker", "label": 1}, {"captions": ["a vehicle engine revs and tires squeal", "an airplane engine spools and people speak"], "sample_ids": ["yDoT73BWsdA", "wTjoRj1se3U"], "start_seconds": ["10", "390"], "properties": ["engine revs, tires squeal, vehicle", "airplane, engine, spool"], "captions_pred_video": ["a man driving a race car with a helmet on the steering wheel", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a race car accelerates and revs its engine ", "a jet engine is running and people are talking"], "question": "which entity is a vehicle", "label": 0}, {"captions": ["multiple beeps are followed by a squawk and a child speaking", "a man speaks as a car is passing by"], "sample_ids": ["w34HjHr6gAY", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["beeps, squawk, child speaking", "a, car, pass"], "captions_pred_video": ["the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz 1995 the wizard of oz", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["a beep sounds followed by a child speaking", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a man speaking to a car?", "label": 1}, {"captions": ["birds chirp and objects are moved around", "three men talk while wind blows and some liquid flows"], "sample_ids": ["yPUYU6t3rwo", "vJ7JPEFhyLA"], "start_seconds": ["370", "16"], "properties": ["birds chirp, objects are moved around, birds", "three men, wind, flow"], "captions_pred_video": ["footage and stock-footage/b-roll of a beekeeper opening a beehive", "a man in a red shirt paddling a kayak in the water"], "captions_pred_audio": ["insects buzz and a man speaks", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is about moving objects around", "label": 0}, {"captions": ["a machine beeps continuously", "a gun shoots, followed by water sloshing nearby"], "sample_ids": ["y682ml90jGw", "wDVMhEdTiVw"], "start_seconds": ["11", "30"], "properties": ["beeps, machine, continuously", "gun, shoot, water"], "captions_pred_video": [null, "a blurry image of trees and water in the forest"], "captions_pred_audio": ["a beeping sound is being made ", "a gun is fired followed by splashing and a person sneezing"], "question": "which entity is more likely to be used in a war", "label": 1}, {"captions": ["people speaking indiscriminately in the distance with a person snoring loudly nearby", "a duck quacks loudly and continuously"], "sample_ids": ["w2JXXIAdUdg", "vh30P49Po6s"], "start_seconds": ["10", "30"], "properties": ["snoring, distance, person", "loud, continuous, quacks"], "captions_pred_video": ["a close up shot of a person's mouth with a toothbrush in it", "of a man brushing his teeth with a toothbrush in his mouth"], "captions_pred_audio": ["a person snoring and a dog whimpering", "a duck is quacking loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["running water in a faucet with some clinks", "a man speaks followed by another man speaking outside"], "sample_ids": ["zNRChLjqcU", "viuTg1M-dqg"], "start_seconds": ["220", "30"], "properties": ["water, faucet, run", "two men, speak, follow"], "captions_pred_video": [null, "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["water is running from a faucet into a sink", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a video of two men speaking?", "label": 1}, {"captions": ["a vehicle engine runs while a woman makes an announcement", "a train horn blows as it passes by"], "sample_ids": ["su6FAOcOA8c", "zVacuqSb4LI"], "start_seconds": ["4", "30"], "properties": ["engine, run, woman", "horn, blows, train"], "captions_pred_video": ["shows a group of people riding on a crowded subway train", "by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by mike amstong a video by"], "captions_pred_audio": ["a woman is speaking and a subway train is moving ", "a train whistle blows and a train passes by with a whistle blowing "], "question": "which entity is a vehicle?", "label": 0}, {"captions": ["a baby cries and fusses, a woman speaks, and a man speaks", "several insects fly while two men talk"], "sample_ids": ["wyllXV6PjKo", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["a baby, a woman, a man", "several, fly, men"], "captions_pred_video": [null, "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a woman speaks and a baby cries", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more people", "label": 0}, {"captions": ["a man speaks as a car is passing by", "several insects fly while two men talk"], "sample_ids": ["sK4u5T8hW78", "s-T9OVOiMLo"], "start_seconds": ["30", "330"], "properties": ["a, car, pass", "several, fly, men"], "captions_pred_video": ["for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking with background noise and breathing sounds ", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more moving parts", "label": 1}, {"captions": ["people speak in a closed space", "a man speaks on a radio as wind blows"], "sample_ids": ["sTpirNYo8vQ", "tDVADusiIoc"], "start_seconds": ["30", "60"], "properties": ["people, space, speak", "man, radio, blows"], "captions_pred_video": ["of a man taking a selfie on a bus", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a man is speaking while a car is revving and accelerating ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is in a closed space", "label": 0}, {"captions": ["a man speaks as birds chirp and a vehicle passes nearby", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["siJFXfGWgDk", "wz7N8YRy74I"], "start_seconds": ["50", "30"], "properties": ["a, bird, vehicle", "rooster, crow, background, men"], "captions_pred_video": ["footage of a beekeeper working with bees in a beehive", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking and birds are chirping in the background ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a bird in it?", "label": 0}, {"captions": ["a horn blasts as warning bells ring", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["zgUgkpk78xU", "tiDFTC-5vU"], "start_seconds": ["70", "30"], "properties": ["horn, bells, ring", "male, duck, laugh"], "captions_pred_video": ["of a train passing through a small town on a sunny day at 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 1080p 60fps 108", null], "captions_pred_audio": ["a train blows its horn as it speeds down the tracks ", "a man is speaking and ducks are quacking"], "question": "which entity is a warning", "label": 0}, {"captions": ["water splashing and wind blowing as a powerful engine roars", "water drips and bubbles as a man speaks"], "sample_ids": ["yZmhM1HcsyE", "vSeGhaZt-aI"], "start_seconds": ["4", "50"], "properties": ["engine, roar, water", "water, bubbles, speak"], "captions_pred_video": ["footage of a speedboat on a lake with water spraying from the back of the boat", "a man in a kitchen preparing a smoothie with a blender"], "captions_pred_audio": ["a motorboat speeds through water with wind noise in the background ", "a man is speaking and pouring liquid with background noise "], "question": "which entity is more calm", "label": 1}, {"captions": ["a man talks while metallic objects are rapped and steam is released", "several insects fly while two men talk"], "sample_ids": ["vuUVPzd2FXw", "s-T9OVOiMLo"], "start_seconds": ["160", "330"], "properties": ["a, steam, release", "several, fly, men"], "captions_pred_video": ["of the person cooking on the grill with a spatula", "a man climbing up a tree using a rope to reach the top of the tree"], "captions_pred_audio": ["a man is speaking and dishes are clanging", "a man is speaking while insects are buzzing in the background "], "question": "which entity has more flying insects", "label": 1}, {"captions": ["a loud snarling engine is followed by a man laughing", "a baby laughs giddily and a woman laughs then speaks"], "sample_ids": ["zl9Dqx-j7q4", "wjsXBsc7M40"], "start_seconds": ["6", "10"], "properties": ["engine, laugh, loud", "a baby laughs, a woman laughs, a woman speaks"], "captions_pred_video": ["footage of a man driving a car in the dark", "footage of the baby playing with a toothbrush"], "captions_pred_audio": ["a jet engine roars ", "a baby laughs and a woman speaks"], "question": "which entity is a human", "label": 1}, {"captions": ["a door opens and birds chirp", "a man speaks as a car is passing by"], "sample_ids": ["yeFvk9x0wWI", "sK4u5T8hW78"], "start_seconds": ["30", "30"], "properties": ["door, open, birds", "a, car, pass"], "captions_pred_video": ["a mouse in a cage on the sidewalk in front of a fence", "for 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai accent 1 6l 2007 hyundai"], "captions_pred_audio": ["birds chirp in the background as a car drives by ", "a man is speaking with background noise and breathing sounds "], "question": "which entity is a person", "label": 1}, {"captions": ["people converse as a motor runs and air brakes hiss", "television program is played far away while a woman talks and birds tweet nearby"], "sample_ids": ["zFjIWfSD-4", "vbZ-0lGPneg"], "start_seconds": ["410", "30"], "properties": ["People, motor, brakes", "a woman, a television program, a bird"], "captions_pred_video": [null, "of a man holding a baby duck in his hands"], "captions_pred_audio": ["a man is speaking while a car is driving and a ticking sound is heard ", "a woman is speaking and a dog is whimpering"], "question": "which entity has a bird?", "label": 1}, {"captions": ["bees buzz as wind blows", "a woman talks while something is fried and objects are tapped"], "sample_ids": ["tMJne1a4AFI", "yajyRTUQk3U"], "start_seconds": ["0", "400"], "properties": ["bees, buzz, wind", "a woman, something, fried"], "captions_pred_video": ["a swarm of bees on the ground", "- a woman cooking in the kitchen"], "captions_pred_audio": ["a swarm of bees buzzing around", "a woman is speaking while food is frying in the background"], "question": "which entity is a person", "label": 1}, {"captions": ["people speak softly as food sizzles", "an airplane engine spools and people speak"], "sample_ids": ["yhQ2Lg-7qDY", "wTjoRj1se3U"], "start_seconds": ["130", "390"], "properties": ["food, sizzle, speak", "airplane, engine, spool"], "captions_pred_video": ["a pan filled with meat and sauce being cooked on a stove top", "footage of a man playing with a remote control airplane in a field"], "captions_pred_audio": ["a faucet is running and a man is speaking", "a jet engine is running and people are talking"], "question": "which entity is about a plane?", "label": 1}, {"captions": ["music plays and a woman speaks on a radio before gunshots are fired", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["xKB8O8LTs6s", "tDlysoZiA1I"], "start_seconds": ["70", "0"], "properties": ["music, radio, gunshots", "animal, grunts, chirps"], "captions_pred_video": ["in your own words a screenshot of the game's loading screen", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["music plays while a woman speaks and gunshots are fired ", "birds are chirping and a rooster is crowing "], "question": "which entity is more quiet", "label": 1}, {"captions": ["dogs barking and whimpering", "a person is snoring while sleeping"], "sample_ids": ["tIY7qOV3rEM", "vJrjSeP17yE"], "start_seconds": ["0", "40"], "properties": ["barking, whimpering, dog", "a person is sleeping, snoring, person"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "a black background with a small plane flying in the sky"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a person snoring loudly"], "question": "which entity is a person", "label": 1}, {"captions": ["a girl speaks followed by a scream and more girls talking", "music plays and someone speaks before gunfire and an explosion occurs"], "sample_ids": ["uYT5gxnyMWM", "xKB8O8LTs6s"], "start_seconds": ["50", "70"], "properties": ["a, scream, girl", "music, gunfire, explosion"], "captions_pred_video": ["footage of a person spraying paint on the ceiling", "in your own words a screenshot of the game's loading screen"], "captions_pred_audio": ["a woman is speaking and a baby is crying", "music plays while a woman speaks and gunshots are fired "], "question": "which entity has more gunfire", "label": 1}, {"captions": ["birds chirp and an insect buzzes around", "people converse as a motor runs and air brakes hiss"], "sample_ids": ["t97k0cejSQE", "zFjIWfSD-4"], "start_seconds": ["250", "410"], "properties": ["bird, chirp, insect", "People, motor, brakes"], "captions_pred_video": ["a bee on a purple thistle flower", null], "captions_pred_audio": ["a bee buzzes and a woman speaks", "a man is speaking while a car is driving and a ticking sound is heard "], "question": "which entity is more likely to be in a car", "label": 1}, {"captions": ["a motor slows to a stopover traffic noises", "a woman speaks as she rubs two objects together"], "sample_ids": ["zofjfKhqLk8", "vzxHnu-SFEw"], "start_seconds": ["10", "80"], "properties": ["noise, stop, motor", "two objects, woman, speak"], "captions_pred_video": ["footage of a man using a machine to cut a piece of wood", "how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to make a paper cup with scissors youtube how to"], "captions_pred_audio": ["a large engine is running and a bell is ringing", "a woman is speaking and breathing with mechanisms in the background "], "question": "which entity is a person", "label": 1}, {"captions": ["a muffled toilet flushes and the water drains", "dishes cling together then a man begins to speak"], "sample_ids": ["sfAvvZwdLCY", "sQGXqGcwOTc"], "start_seconds": ["20", "3"], "properties": ["flushes, drains, water", "cling, speak, dishes"], "captions_pred_video": ["footage of the toilet in the bathroom", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a toilet is flushed", "mechanisms are operating and water is splashing "], "question": "which entity is about water?", "label": 0}, {"captions": ["a man speaks on a radio as wind blows", "a series of light horn beeps is followed by a loud steam whistle"], "sample_ids": ["tDVADusiIoc", "wnpJndXuxLc"], "start_seconds": ["60", "50"], "properties": ["man, radio, blows", "beeps, loud, whistle"], "captions_pred_video": ["a person riding on the back of a sailboat in rough seas", "footage of the train coming down the tracks on a snowy day"], "captions_pred_audio": ["a man is speaking while the wind is blowing and water is splashing", "a steam whistle blows and a train moves with wind noise in the background "], "question": "which entity is louder", "label": 1}, {"captions": ["wind blows and a vehicle blows a hard then a train blows a horn", "a car accelerates and wind blows"], "sample_ids": ["wnpJndXuxLc", "u0TrcHhkPQ"], "start_seconds": ["50", "20"], "properties": ["blows, vehicle, train", "accelerates, wind, blows"], "captions_pred_video": ["footage of the train coming down the tracks on a snowy day", null], "captions_pred_audio": ["a steam whistle blows and a train moves with wind noise in the background ", "a race car accelerates and revs its engine "], "question": "which entity is moving", "label": 1}, {"captions": ["goats bleat and metal clings", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["tH17JPjDPnc", "tiDFTC-5vU"], "start_seconds": ["260", "30"], "properties": ["bleat, metal, clings", "male, duck, laugh"], "captions_pred_video": ["feed of the goats eating hay in the barn", null], "captions_pred_audio": ["a cow is mooing and mechanisms are ticking ", "a man is speaking and ducks are quacking"], "question": "which entity is a person speaking?", "label": 1}, {"captions": ["a man speaks and a rooster crows while men talk in the background", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["wz7N8YRy74I", "wz7N8YRy74I"], "start_seconds": ["30", "30"], "properties": ["rooster, crow, background, men", "rooster, crow, background, men"], "captions_pred_video": ["footage of the sun shining through the clouds on a cloudy day", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking birds are chirping and a rooster is crowing ", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity has a rooster crow while men talk in the background?", "label": 0}, {"captions": ["a crowd yells, reacts and applauds", "a man speaks as a motor runs in the background"], "sample_ids": ["wztCSUxOf8", "xZepNM9qcRA"], "start_seconds": ["130", "30"], "properties": ["a crowd, yells, applauds", "background, motor, run"], "captions_pred_video": [null, "a close-up view of the motorcycle's engine and exhaust system"], "captions_pred_audio": ["a man is speaking and a crowd is clapping", "a man speaks while a motorcycle revs and accelerates "], "question": "which entity is silent", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "a man speaks on a radio as wind blows"], "sample_ids": ["zl9Dqx-j7q4", "tDVADusiIoc"], "start_seconds": ["6", "60"], "properties": ["motors rev, laugh, loudly", "man, radio, blows"], "captions_pred_video": ["footage of a man driving a car in the dark", "a person riding on the back of a sailboat in rough seas"], "captions_pred_audio": ["a jet engine roars ", "a man is speaking while the wind is blowing and water is splashing"], "question": "which entity is a man speaking on a radio?", "label": 1}, {"captions": ["women speak as water runs briefly, children call out, and a man speaks", "a vehicle engine accelerating then running on idle"], "sample_ids": ["uRExseg-0XI", "vYkA3cfXp5Q"], "start_seconds": ["210", "30"], "properties": ["woman, man, water", "engine, accelerate, idle"], "captions_pred_video": ["stock footage or video footage of a person stirring a pot on a stove with a long-handled wooden spoon", "footage of a car driving down the street on a sunny day"], "captions_pred_audio": ["a man is speaking while water is running and birds are chirping ", "an engine is idling"], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a bird chirps in response to a woman chirping for the birds", "a person is burping while a girl speaks"], "sample_ids": ["uOpoD0gGXcs", "vdoxuJn9lTc"], "start_seconds": ["120", "40"], "properties": ["chirps, woman, bird", "person, burp, girl"], "captions_pred_video": ["a herd of cows grazing in the field", "a group of young girls playing a video game together in a living room"], "captions_pred_audio": ["birds are chirping and a man is speaking", "a child speaks followed by a burp"], "question": "which entity is a person?", "label": 1}, {"captions": ["birds chirp as a bell rings", "a clock ticktocks"], "sample_ids": ["ziUT9IFTkjg", "v-g-j2uTByM"], "start_seconds": ["10", "30"], "properties": ["chirp, bell, ring", "ticktocks, clock, ticktocks"], "captions_pred_video": [null, "in your own words a cuckoo clock hanging on the wall"], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a clock is ticking loudly"], "question": "which entity is silent", "label": 1}, {"captions": ["a stream of water runs briefly", "a bus engine idles while a woman speaks making an announcement"], "sample_ids": ["x-PeY8Yb8M4", "su6FAOcOA8c"], "start_seconds": ["300", "4"], "properties": ["stream, water, run", "engine, idle, woman"], "captions_pred_video": ["a man sitting on a rock in the middle of a river", "shows a group of people riding on a crowded subway train"], "captions_pred_audio": ["a car is driving on a wet road ", "a woman is speaking and a subway train is moving "], "question": "which entity is stationary", "label": 1}, {"captions": ["someone whistles a song", "an infant crying as a woman laughs"], "sample_ids": ["sIXTftIuUgw", "xhmRY9yhC7c"], "start_seconds": ["90", "20"], "properties": ["someone, song, whistle", "a, laugh, infant"], "captions_pred_video": [null, "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a person whistling a song", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["dishes cling together then a man begins to speak", "water flows and trickles"], "sample_ids": ["sQGXqGcwOTc", "tB7hWb9gTuQ"], "start_seconds": ["3", "30"], "properties": ["cling, speak, dishes", "water, flow, trickle"], "captions_pred_video": ["a man washing dishes in a commercial kitchen", "the rocks on the beach are surrounded by water and the sky is visible in the background"], "captions_pred_audio": ["mechanisms are operating and water is splashing ", "water is splashing and gurgling"], "question": "which entity is a liquid", "label": 1}, {"captions": ["small dogs growl, bark and yip.", "a train whistle keeps going off while the clickety-clack of the train on the rails are continuous"], "sample_ids": ["sShpyu2l4YQ", "ukg5L09Wpvo"], "start_seconds": ["0", "150"], "properties": ["growl, bark, yip", "clickety-clack, train, whistle"], "captions_pred_video": ["the puppies are playing with a toy", "footage of a train passing through a forest on a dirt road"], "captions_pred_audio": ["a dog is barking and growling", "a train blows its whistle and blows its horn "], "question": "which entity is a train", "label": 1}, {"captions": ["a small engine spits as it runs", "a harsh wind blows as a man speaks and another man speaks"], "sample_ids": ["sZvwOuuPGP0", "y8WEcpOlT3I"], "start_seconds": ["50", "40"], "properties": ["spits, engine, runs", "harsh, wind, blows"], "captions_pred_video": ["of a bulldozer clearing a road in a forest stock footage and royalty-free videos", "on how to use a sewing machine youtube"], "captions_pred_audio": ["a medium engine is running ", "a man is speaking with wind noise in the background "], "question": "which entity is not a person?", "label": 0}, {"captions": ["a baby laugh at a sputter", "a small airplane approaches and then flies by, after and during which a boy speaks"], "sample_ids": ["sLUnaPT5gM8", "zj2R0XoFr5k"], "start_seconds": ["0", "50"], "properties": ["laugh, sputter, baby", "airplane, boy, fly"], "captions_pred_video": ["of a baby laying on his stomach in a blue shirt and diaper", "footage of a small airplane flying in the sky stock videos and royalty-free footage"], "captions_pred_audio": ["a baby is laughing and breathing while a man is speaking ", "a woman speaks while a helicopter flies overhead "], "question": "which entity is a video", "label": 1}, {"captions": ["a train engine runs and a horn blows", "a male speaks and another male speaks"], "sample_ids": ["zPX9o1uDiI", "viuTg1M-dqg"], "start_seconds": ["40", "30"], "properties": ["engine, horn, run", "two males, speaking, male"], "captions_pred_video": [null, "footage of water coming out of a hole in the ground"], "captions_pred_audio": ["a train moves with its horn blowing and wheels squealing ", "a man is speaking with background noise and breathing sounds "], "question": "which entity has more than one speaker", "label": 1}, {"captions": ["leaves rustle while man speaks", "an infant crying as a woman laughs"], "sample_ids": ["zOZleIRqZm4", "xhmRY9yhC7c"], "start_seconds": ["80", "20"], "properties": ["leaves, rustle, speak", "a, laugh, infant"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "of a baby crying in a baby bouncer"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a baby cries and a woman speaks"], "question": "which entity is a person", "label": 1}, {"captions": ["television program is played far away while a woman talks and birds tweet nearby", "paper folding and crinkling"], "sample_ids": ["vbZ-0lGPneg", "zPpG3RD8lSs"], "start_seconds": ["30", "20"], "properties": ["a woman, a television program, a bird", "paper, fold, crinkle"], "captions_pred_video": ["of a man holding a baby duck in his hands", "how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's day card out of a piece of paper youtube how to make a valentine's"], "captions_pred_audio": ["a woman is speaking and a dog is whimpering", "the wind blows and a mouse clicks "], "question": "which entity is a demonstration of folding and crinkling?", "label": 1}, {"captions": ["motors rev and run loudly as a person laughs", "a toilet flushes and water drains unevenly"], "sample_ids": ["zl9Dqx-j7q4", "vhJWZheqaE"], "start_seconds": ["6", "0"], "properties": ["motors rev, laugh, loudly", "water drains unevenly, toilet flushes, water drains"], "captions_pred_video": ["footage of a man driving a car in the dark", null], "captions_pred_audio": ["a jet engine roars ", "a toilet is flushed"], "question": "which entity is a toilet?", "label": 1}, {"captions": ["running water in a faucet with some clinks", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["zNRChLjqcU", "w5W5Kqtc8E"], "start_seconds": ["220", "100"], "properties": ["water, faucet, run", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["water is running from a faucet into a sink", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is a vehicle?", "label": 1}, {"captions": ["a man speaks then blows a vehicle horn as wind blows", "dishes cling together then a man begins to speak"], "sample_ids": ["zALy31PjDl0", "sQGXqGcwOTc"], "start_seconds": ["21", "3"], "properties": ["a man, a vehicle, a horn", "cling, speak, dishes"], "captions_pred_video": ["a motorcycle is parked on the side of a brick walkway", "a man washing dishes in a commercial kitchen"], "captions_pred_audio": ["a man is speaking and a car horn is honking", "mechanisms are operating and water is splashing "], "question": "which entity is about a man speaking?", "label": 0}, {"captions": ["a jet engine screams, then increases its power", "a car accelerates and wind blows"], "sample_ids": ["vBslzh7saPw", "u0TrcHhkPQ"], "start_seconds": ["90", "20"], "properties": ["power, scream, increase", "accelerates, wind, blows"], "captions_pred_video": ["a pickup truck carrying a large object down the road", null], "captions_pred_audio": ["a jet engine roars and accelerates ", "a race car accelerates and revs its engine "], "question": "which entity is moving faster", "label": 1}, {"captions": ["a motor vehicle roars, drowning out people speaking in the background", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["s4Uz1Ffgo04", "vfYTJq7nU"], "start_seconds": ["100", "130"], "properties": ["roars, background, people speaking", "rustling, ducks, quack"], "captions_pred_video": ["footage of an ambulance arriving at the scene of an accident", null], "captions_pred_audio": ["a man is speaking while a boat is moving and wind is blowing ", "a duck quacks and a woman speaks"], "question": "which entity is more quiet", "label": 1}, {"captions": ["leaves rustling followed by a small bell chiming as birds chirp in the background", "a vehicle engine runs and wind blows before women yell"], "sample_ids": ["ziUT9IFTkjg", "w5W5Kqtc8E"], "start_seconds": ["10", "100"], "properties": ["background, birds, rustling", "wind, blow, vehicle"], "captions_pred_video": [null, null], "captions_pred_audio": ["birds are chirping and a chime is ringing ", "a motorboat is moving and people are shouting and cheering "], "question": "which entity is about a vehicle engine running and wind blowing?", "label": 1}, {"captions": ["insects humming with a dog barking and small goat bleating", "wind blows as people chatter quietly"], "sample_ids": ["tIY7qOV3rEM", "xBxDz0CFVn0"], "start_seconds": ["0", "30"], "properties": ["animal, bark, dog, barking, small, goat, bleating", "wind, chatter, people"], "captions_pred_video": ["a dog is standing in the middle of a dirt road in the woods", "footage is blurry and out of focus"], "captions_pred_audio": ["a dog is barking and a cat is meowing", "a man is speaking with wind noise in the background "], "question": "which entity is quieter", "label": 1}, {"captions": ["a toilet flushes and water drains", "a person snores loudly multiple times at a close distance"], "sample_ids": ["sfAvvZwdLCY", "sSMl2vc3ek"], "start_seconds": ["20", "20"], "properties": ["water drains, flushes, water", "loud, multiple, distance"], "captions_pred_video": ["footage of the toilet in the bathroom", null], "captions_pred_audio": ["a toilet is flushed", "a person snoring loudly"], "question": "which entity is louder", "label": 1}, {"captions": ["people clap and speak in the distance", "music plays, a person speaks, followed by whooshes and a ding"], "sample_ids": ["wwyfGO2J4", "tQWGZLItBXk"], "start_seconds": ["90", "170"], "properties": ["clap, distance, speak", "music, person, ding"], "captions_pred_video": [null, "worms revolution screenshots"], "captions_pred_audio": ["people are clapping and speaking with background noise ", "a child speaks music plays video game sounds sound effects and sound effects play "], "question": "which entity has a person speaking?", "label": 1}, {"captions": ["a woman talks while something is fried and objects are tapped", "rustling occurs, ducks quack and water splashes, followed by an adult female and adult male speaking and duck calls being blown"], "sample_ids": ["yajyRTUQk3U", "vfYTJq7nU"], "start_seconds": ["400", "130"], "properties": ["a woman, something, fried", "rustling, ducks, quack"], "captions_pred_video": ["- a woman cooking in the kitchen", null], "captions_pred_audio": ["a woman is speaking while food is frying in the background", "a duck quacks and a woman speaks"], "question": "which entity is about a woman talking?", "label": 0}, {"captions": ["several ducks quack and cocks crow far away", "various birds chirp and squeal, and an animal grunts"], "sample_ids": ["sNB8zxXneIM", "tDlysoZiA1I"], "start_seconds": ["20", "0"], "properties": ["several, quack, cocks", "animal, grunts, chirps"], "captions_pred_video": ["a group of geese in a cage", "'s main subject is a dog standing on top of a laptop"], "captions_pred_audio": ["a rooster is crowing and wind is blowing ", "birds are chirping and a rooster is crowing "], "question": "which entity is more animal-like", "label": 1}, {"captions": ["leaves rustle while man speaks", "a man speaks and a rooster crows while men talk in the background"], "sample_ids": ["zOZleIRqZm4", "wz7N8YRy74I"], "start_seconds": ["80", "30"], "properties": ["leaves, rustle, speak", "rooster, crow, background, men"], "captions_pred_video": ["a person picking berries from the bushes in the garden", "footage of the sun shining through the clouds on a cloudy day"], "captions_pred_audio": ["a man is speaking with crickets chirping in the background", "a man is speaking birds are chirping and a rooster is crowing "], "question": "which entity is more likely to be in a farm setting", "label": 1}, {"captions": ["several insects fly while two men talk", "a male is speaking and a duck quacks as others laugh"], "sample_ids": ["s-T9OVOiMLo", "tiDFTC-5vU"], "start_seconds": ["330", "30"], "properties": ["several, fly, men", "male, duck, laugh"], "captions_pred_video": ["a man climbing up a tree using a rope to reach the top of the tree", null], "captions_pred_audio": ["a man is speaking while insects are buzzing in the background ", "a man is speaking and ducks are quacking"], "question": "which entity has more people", "label": 1}, {"captions": ["various birds chirp and squeal, and an animal grunts", "someone whistles a tune"], "sample_ids": ["tDlysoZiA1I", "sIXTftIuUgw"], "start_seconds": ["0", "90"], "properties": ["animal, grunts, chirps", "someone, tune, whistle"], "captions_pred_video": ["'s main subject is a dog standing on top of a laptop", null], "captions_pred_audio": ["birds are chirping and a rooster is crowing ", "a person whistling a song"], "question": "which entity is a human", "label": 1}]
\ No newline at end of file
diff --git a/lavis/configs/datasets/discriminatory_reasoning/discriminatory_dataset/objaverse_discrn.json b/lavis/configs/datasets/discriminatory_reasoning/discriminatory_dataset/objaverse_discrn.json
new file mode 100644
index 000000000..66dcb5bfe
--- /dev/null
+++ b/lavis/configs/datasets/discriminatory_reasoning/discriminatory_dataset/objaverse_discrn.json
@@ -0,0 +1 @@
+[{"captions": [" a small house with a tree, pool, and pond in a green and blue landscape.", "a white of a city with buildings and a gold spoon."], "sample_ids": ["e22489ba182f45cb81f0a83f22abe9bd", "2351471a2d2145c59fec5f68ffae4816"], "properties": ["house, tree, pool", "image, city, spoon"], "captions_pred_pc": ["in 15 words or less a black and white image of a square with dots around it", "a black and white image of a diamond shaped piece of fabric"], "captions_pred_image": ["a 3d model of a house and a tree in a box royalty free 3d model preview no.3", "a 3d model of a city skyline in white"], "question": "which image shows a city with buildings and a gold spoon?", "label": 1}, {"captions": [" a small house with stairs and a roof.", " a house with a green, wooden-structured roof."], "sample_ids": ["e9305c80010f4e3b9de9789f01a9bee5", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["roof, stairs, house", "roof, color, green"], "captions_pred_pc": ["above a black and white image of a square with dots all over it", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a 3d rendering of a podium on a wooden floor", "a 3d model of a house with a triangular roof"], "question": "which roof is green", "label": 1}, {"captions": [" a red \"welcome to northwich\" billboard on a wooden base.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["225e4094141d416faba7c5598dc55ff8", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["base material is wood, color is red, message is welcome to northwich", "house, fence, playground"], "captions_pred_pc": ["a black and white illustration of a circular object with many small dots on it", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a 3d model of a chalkboard on a pedestal royalty free 3d model preview no.2", "a 3d model of a room with a lot of wires"], "question": "which entity has a fence", "label": 1}, {"captions": [" a green electrical utility box with a warning sign and a small blue box on a concrete base.", " a city with buildings, houses, trees, and grass."], "sample_ids": ["8e39d4766ed4444ea527d6c5ea33a5ef", "bc649e19956041cf89c1572f1a33cff1"], "properties": ["color, base, warning", "buildings, houses, grass"], "captions_pred_pc": ["a black and white illustration of a box with dots on it", "in 15 words or less a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of an electrical box royalty-free 3d model preview no.2", "an image of a pile of trash on the ground"], "question": "which entity has more grass", "label": 1}, {"captions": ["a collection featuring a furnished room, destroyed building, us map, house with roof, flying plane, and a ring with paper.", " a small wooden house with a green roof."], "sample_ids": ["f13d2d1d78cd49e78f3430abbb251edd", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["collection, room, destroyed, house, roof, plane, ring, paper", "roof, color, green"], "captions_pred_pc": ["a black and white image of a person's face", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a black and white photograph of a person sitting on a couch", "a 3d model of a house with a ladder"], "question": "which roof is green", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " a rusty green metal box with a handle and a gun inside."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "76cfd0e88ce243d483919a018a4f1a9e"], "properties": ["roof truss, insulation, suspended ceiling", "box, handle, gun"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "a black and white square with dots on a white background"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d rendering of a metal box with a handle"], "question": "which object has a handle", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", " a house with a pink roof, brick walls, and insulated ceiling."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "c8936ace72954650b4e2d84246964849"], "properties": ["yellow, table, roof", "roof, color, pink"], "captions_pred_pc": ["a black and white drawing of a floor plan", "a black and white drawing of a toilet"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of a house with a roof"], "question": "which roof is the color of the house", "label": 1}, {"captions": [" three white cubes, one featuring a black door.", " a long row of steel shelves in a warehouse, featuring a suspended scaffolding system."], "sample_ids": ["be791c81b1964def8e1f3b4e9a802e24", "578fe7a7bd754b889be33aea99cf5050"], "properties": ["color, shape, number", "a, material, steel"], "captions_pred_pc": ["a black and white illustration of a cross made up of dots", "above a black and white image of a rack with multiple shelves"], "captions_pred_image": ["a 3d rendering of three white cubes in a row", "a 3d model of a large metal structure"], "question": "which object is made of steel", "label": 1}, {"captions": ["a red circle, red and white arrow, mouse, and child's handwriting.", " of a small wooden house with a blue roof, featuring a police sign and resembling a lighthouse."], "sample_ids": ["3c233f87bf264968a7f0660b9eac9e4a", "9b2c93d651c3409096118c5ce5b993f2"], "properties": ["red, mouse, handwriting", "house, roof, blue"], "captions_pred_pc": ["in 15 words or less a black and white image of a person's hand holding a pencil and drawing on a piece of paper", "a black and white illustration of a coffee mug on a white background royalty free illustration"], "captions_pred_image": ["a black and white drawing of a person's hand holding a pencil", "a 3d model of a small house and barn"], "question": "which house has a blue roof", "label": 1}, {"captions": [" a small white building with stairs and shelves.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["9e1f64d4fd514059be934077717536dc", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["building, stairs, shelves", "torso, breasts, pattern"], "captions_pred_pc": ["a black and white image of a person standing in front of a white background", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a white 3d model of a building with stairs", "a 3d model of a woman's chest"], "question": "which entity has a pattern", "label": 1}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", "a white of a man in a suit and mask, possibly a diving suit."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "205251e4277e41d1aae6b2358267ad56"], "properties": ["color, material, structure", "image, color, white"], "captions_pred_pc": ["a black and white drawing of a room with dots", "a black and white image of a beetle on a white background"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a 3d printed figurine of an alien creature"], "question": "which entity is a white image?", "label": 1}, {"captions": [" of a red steel playground structure with yellow rails, featuring a bench and storage rack.", " a small house with a roof and door, resembling a shack or shed."], "sample_ids": ["91e069e84f754aceb99e28541cf7ae39", "f1b557775310478893242180defa4d80"], "properties": ["color, bench, rack", "shack, roof, door"], "captions_pred_pc": ["of a 3d illustration of a black and white square frame", "a black and white illustration of a telephone on a white background"], "captions_pred_image": ["a 3d rendering of a metal frame structure", "a 3d model of a small house in the middle of a field"], "question": "which entity is a shack?", "label": 1}, {"captions": [" of a pile of metal, torn and shredded paper, rocks, and a decayed animal.", " a house with a wooden-framed roof structure."], "sample_ids": ["c117f1923cad4ecf9df61b6e3d633374", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["a pile of metal, torn and shredded paper, rocks, and a decayed animal", "roof, material, wood"], "captions_pred_pc": ["a black and white map of germany on a white background", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a crumpled piece of paper on a white background", "a 3d model of a building with a roof"], "question": "which entity is made of wood", "label": 1}, {"captions": ["a featuring a fireplace, wooden bench, sled, log, castle, and cat.", " a small wooden house with a green roof."], "sample_ids": ["fe4cd6c3972940a5b7854ca1ebc8a5a3", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["fireplace, bench, log", "roof, color, green"], "captions_pred_pc": ["a black and white illustration of a snowflake", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d model of a fireplace with snow on the ground", "a 3d model of a house with a ladder"], "question": "which entity has a green roof", "label": 1}, {"captions": ["a white teddy bear, cloud, skull, octopus, and bird in various positions on a gray background.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["dd5849aced0443b1b4b38d413f7e06c4", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["background, color, white", "door, lock, handle"], "captions_pred_pc": ["a black and white image of a cat's head", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a 3d model of an animal skull in white on a gray background", "a black and white image of a door with a crack in it"], "question": "which door has a lock and handle", "label": 1}, {"captions": ["a 3d white box with black trim, stripes, and handles.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["55b26130f1514032be078e13fd982905", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["color, shape, material", "door, lock, handle"], "captions_pred_pc": ["a black and white drawing of a square made up of dots", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a 3d model of a white box with a black handle", "a black and white image of a door with a crack in it"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a small green chair with a slanted back and white base.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["7f93c12cbbc74e579d5f0430cfa0010f", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["color, white, base, slanted", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["above a black and white drawing of a chair", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a chair", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a white and wooden chest of drawers cabinet.", " a white plastic container with a lid, a small box, a cup, a bottle, and a jar."], "sample_ids": ["d5722274fb094222aca90bb59f4dff09", "20a02705a66f460492e07345e84a62ed"], "properties": ["chest of drawers, cabinet, white", "a box, a cup, a bottle, a jar"], "captions_pred_pc": ["a cross on a white background vector illustration of a cross on a white background royalty free stock illustrations", "a black and white pattern of dots on a white background"], "captions_pred_image": ["a 3d rendering of a white cabinet with a drawer", "a 3d model of a plastic bottle, a plastic cap, and a plastic container"], "question": "which entity has a box", "label": 1}, {"captions": [" a white table with grey legs, a white top, writing on it, and three legs.", " tall grass, plants, rocks, and a tree."], "sample_ids": ["68e0d097351843a3980421f2ae624c59", "eefed882ed5f4711bc5a76332d9712f3"], "properties": ["white, top, writing", "grass, plants, rocks"], "captions_pred_pc": ["a group of black dots on a white background stock illustration a group of black dots on a white background royalty free illustration", "a black and white drawing of a tree"], "captions_pred_image": ["a table with writing on it and a mouse on top of it", "a 3d model of a group of trees"], "question": "which entity has more grass", "label": 1}, {"captions": ["two white 3d egg-shaped spheres.", "a 3d white cube featuring various text and logos, including \"gypsy stribes,\" \"guinea pig studios,\" \"guillaume pi sodds,\" \"happy studios,\" and \"guillem pie sos.\""], "sample_ids": ["922b10ec49fa4b2eb43e257ff5b2b1ef", "5d08c34bfb2c4c9b9538e24d68761331"], "properties": ["shape is egg-shaped, color is white, number is two", "- material is plastic- color is white- shape is cube"], "captions_pred_pc": ["a black and white image of a pair of earrings", "of a black and white photo of a person sitting on a bench"], "captions_pred_image": ["a 3d rendering of a white object on a grey background", "a 3d image of a cube with the word 'stories' written on it"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a white featuring a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet.", " a table with pillar-like yellow and white poles, featuring a wooden structure, large tent, glass roof, and a chandelier with numerous glass tubes."], "sample_ids": ["f28f57745af04115b0bacdde80c3b9ee", "fa06167d83e54b05bdfbeeae2ca7c8a6"], "properties": ["a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet", "table, structure, roof"], "captions_pred_pc": ["a black and white illustration of a pair of sunglasses", "a black and white image of a map with dots"], "captions_pred_image": ["a 3d model of a medieval drinking horn 3d model available for 3d printing royalty free 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d", "a 3d model of a building with many pillars"], "question": "which entity has a wooden structure", "label": 1}, {"captions": [" a marble environment featuring a table with two oranges and a small toy.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["632b18133e924299976efaed4a52c74d", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["Object, Table, Oranges", "a, material, clay"], "captions_pred_pc": ["a black and white illustration of a sphere with dots on it", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a marble bathroom with a shower and a toilet", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": [" of a barrel and a cube together.", "s of a laptop, printer, building, and small bathroom, along with a blue-lit box, white and blue truck, and hp color inkjet cartridge."], "sample_ids": ["f00b7661daf544b68cddf85d7d0308c7", "747110c073314ee39ef2f4a8d63222da"], "properties": ["a, barrel, cube", "s, laptop, printer"], "captions_pred_pc": ["a black and white illustration of a 3d cube and a 3d sphere", "a black and white image of a pair of scissors"], "captions_pred_image": ["a 3d model of a barrel and a box next to each other royalty free 3d model preview no.3", "a black and white image of an electronic device"], "question": "which object is not a barrel?", "label": 0}, {"captions": [" a small building with stairs and a glass floor, featuring a square table and a black square ceiling light.", "a featuring a chair, a building, a glass tower, and a throne made of money, with various unique elements such as a green roof and a computer chip."], "sample_ids": ["8aaad713b8834739b008ccf2f3d86cce", "18d2e75f23474d7489a6d7d605dfc76d"], "properties": ["floor, table, light", "throne, chair, building"], "captions_pred_pc": ["above a black and white photograph of a window", "a black and white illustration of a person sitting on a bench"], "captions_pred_image": ["a black and white 3d model of a staircase on a platform", "a 3d model of a building on top of a table"], "question": "which entity has a throne made of money", "label": 1}, {"captions": [" a small white archway structure resembling a building.", " a house with a green, wooden-structured roof."], "sample_ids": ["5ad02458cf394134a902e25001d2ffef", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["structure, building, archway", "roof, color, green"], "captions_pred_pc": ["for a black and white illustration of a castle on a hill", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a 3d rendering of a white object on a white surface", "a 3d model of a house with a triangular roof"], "question": "which structure is made of wood", "label": 1}, {"captions": ["a small white wooden castle with flags, turrets, and two towers.", " a small wooden house with a green roof."], "sample_ids": ["345b1c7d2d9b4524aa0dcdc3cd27e4da", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["turrets, flags, towers", "roof, color, green"], "captions_pred_pc": ["a black and white illustration of a square made up of many small dots on a white background", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d model of a castle with towers and a drawbridge royalty-free 3d model preview no.1", "a 3d model of a house with a ladder"], "question": "which entity has a green roof", "label": 1}, {"captions": [" of a house with multiple roofs and a suspended ceiling, featuring a white and purple tray.", " a house featuring a pitched roof structure with brick detailing."], "sample_ids": ["7907916e8f524df5b16eb566497db83e", "5fbd274f897b44fcafa02ee84228debf"], "properties": ["color, roof, tray", "structure, roof, pitch"], "captions_pred_pc": ["a black and white image of a metal object", "a black and white illustration of a square with a lot of dots on it"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d model of the roof of a house"], "question": "which roof is pitched", "label": 1}, {"captions": [" of a white plastic tube or metal bar, resembling a knife.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["8fd3836862a44a8d8b4d224bfc30c2c3", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["a knife, blade, handle", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white image of a shelf with a white background", "a black and white image of a cone shaped object"], "captions_pred_image": ["a piece of white plastic on a gray background", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a house featuring a roof, floor plan, small bathroom, pool, and ceiling light fixtures.", " a small house with a roof and ceiling-mounted air conditioner."], "sample_ids": ["6c6549e2975a48a1b59ebe2a6562900e", "6965067ea9e34357a7af21a2f078fbac"], "properties": ["floor plan, bathroom, pool", "roof, air conditioner, house"], "captions_pred_pc": ["the floor plan of the house the floor plan of the house on a white background royalty free illustration", "a black and white illustration of a window"], "captions_pred_image": ["a 3d model of a small house", "a 3d rendering of a small house with a covered porch"], "question": "which house has a roof", "label": 1}, {"captions": [" a snowy small village with farm buildings and a fence.", " of a bearded man wearing a green shirt and a hat."], "sample_ids": ["6bb669534ccc434f9ab4d7b39bae3510", "1e4e5e8133ae48c797facaec724c13a5"], "properties": ["building, fence, snowy", "hat, shirt, bearded"], "captions_pred_pc": ["a black and white drawing of a boat on the water", "of a black and white bracelet on a white background"], "captions_pred_image": ["a 3d model of a small village in the snow royalty free 3d model preview no. 3", "a 3d model of a man with a beard"], "question": "which entity is a man?", "label": 1}, {"captions": ["a small 3d purple teapot and elephant.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["4a27592dc8164f709b44446ee11832c0", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["color, shape, material", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["a black and white 3d illustration of an elephant's head on a white background 3d illustration of an elephant's head on a white background royalty free illustration", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a white ceramic teapot on a gray background", "a white 3d model of a city skyline"], "question": "which entity is a shelf?", "label": 1}, {"captions": [" a car dealership interior featuring a showroom, repair shop, and various elements like a booth, bed, and ceiling light.", " a small house with a tree in front, situated on a hill."], "sample_ids": ["3e22efacf9ee40a1a6b2e4b72a7314d2", "3bde44b5f10946398f1bb9843dc14caa"], "properties": ["a, booth, bed", "house, tree, hill"], "captions_pred_pc": ["a black and white drawing of a tv screen with dots all over it royalty free illustration", "a black and white photo of a cell phone in a puddle of water"], "captions_pred_image": ["a 3d rendering of a room with a black and white color scheme", "a 3d model of a house in the middle of a field"], "question": "which entity has a tree in front", "label": 1}, {"captions": ["white korean word on a gray background.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["b0031bafaeff45e4bbb1c01721cf5b9e", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["color, background, font", "torso, breasts, pattern"], "captions_pred_pc": ["a black and white photo of a group of pipes in the shape of the letter 'o'", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a person holding a pair of chopsticks in front of a gray background", "a 3d model of a woman's chest"], "question": "which entity has a teddy bear?", "label": 1}, {"captions": [" a child's room in a small house with windows.", " a small white house with a roof."], "sample_ids": ["88847a6445044bcbab9611e6028a19b9", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["room, house, windows", "roof, color, white"], "captions_pred_pc": ["for a black and white drawing of a snowflake", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d model of a child's room with toys and furniture in it royalty free 3d model preview no.1", "a 3d model of a building with a white roof"], "question": "which house has a roof that is white?", "label": 1}, {"captions": ["a 3d baseball card in a clear plastic case.", " a small house with a roof and door, resembling a shack or shed."], "sample_ids": ["04438ab47bd9430c97b5aac8f9907e4c", "f1b557775310478893242180defa4d80"], "properties": ["a, color, case", "shack, roof, door"], "captions_pred_pc": ["a black and white image of a pair of sunglasses", "a black and white illustration of a telephone on a white background"], "captions_pred_image": ["a black and white photograph of an airplane poster on a wall", "a 3d model of a small house in the middle of a field"], "question": "which entity is not a shack?", "label": 1}, {"captions": ["a red circle, red and white arrow, mouse, and child's handwriting.", "3d white playground set with slides and swings, featuring a plane, building, and dragon in the sky."], "sample_ids": ["3c233f87bf264968a7f0660b9eac9e4a", "e694d53545d449319a64cceb0280c3c6"], "properties": ["red, mouse, handwriting", "3d, slide, swing"], "captions_pred_pc": ["in 15 words or less a black and white image of a person's hand holding a pencil and drawing on a piece of paper", "for a 3d model of the letter 'j'"], "captions_pred_image": ["a black and white drawing of a person's hand holding a pencil", "a 3d model of a playground slide"], "question": "which entity has a plane in the sky?", "label": 1}, {"captions": [" a brick building with a roof structure and roof truss.", " a small, modern house with a green roof, located on a hill, surrounded by trees, grass, and a pond."], "sample_ids": ["84e8acad28664a738df69d719df9e263", "a452d5381dad4dc09f5ebe10635ae5fe"], "properties": ["roof, structure, truss", "house, roof, green"], "captions_pred_pc": ["a black and white polka dots pattern on a white background polka dots pattern on a white background illustration", "above an illustration of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white"], "captions_pred_image": ["a 3d model of a brick building with a roof", "a 3d model of a building with a black roof"], "question": "which entity has a green roof", "label": 1}, {"captions": [" of a tree stump and rock with flowers on them.", "a featuring a lamp, harp, white bowl, and white curved wall."], "sample_ids": ["3f74af45aeeb43ee95e2c8a5e3afeae6", "55bcec23e1b34f0d9d748b4dcc3ea123"], "properties": ["flower, rock, tree stump", "lamp, harp, bowl"], "captions_pred_pc": ["above a black and white drawing of a flower on a white background", "a black and white illustration of a curved line"], "captions_pred_image": ["a 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree st", "a 3d model of a harp in a white room"], "question": "which entity has a white bowl?", "label": 1}, {"captions": [" a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light.", "s of a cat, fish, person with a hat, pig with a green hat and swimsuit, and a green and pink bird."], "sample_ids": ["5ea0962b100b4fccb761ed84afe027b5", "402601779d1d4146b4cde106dfff1b27"], "properties": ["house, table, chair", "s, cat, fish, person, pig, bird"], "captions_pred_pc": ["above a black and white photograph of an open door", "above a black and white photo of a toy octopus on a white background"], "captions_pred_image": ["a 3d rendering of a small white table with a chair", "a snowflake in the air on a cloudy day"], "question": "which entity has a cat", "label": 1}, {"captions": [" a building surrounded by various structures, including a skyscraper, in a city setting.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["3b780ced7a814f86b3ee67f3596dddce", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["building, skyscraper, city", "door, lock, handle"], "captions_pred_pc": ["a black and white drawing of an airplane", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a 3d model of a knife with a building on top of it", "a black and white image of a door with a crack in it"], "question": "which door has a lock", "label": 1}, {"captions": [" a small gnome chandelier with three candles, featuring red and blue lights and adorned with pink and blue ribbons.", " of a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["6398f4e75a2d480da58396827ac64249", "8cd3f5ff0fc041eca9a480faa6739480"], "properties": ["- color is red, blue, pink", "roof, trusses, beams"], "captions_pred_pc": ["for a black and white image of an object on a white background", "in 15 words or less a black and white drawing of a window"], "captions_pred_image": ["a white chandelier with three light bulbs hanging from the ceiling", "a 3d model of a roof structure"], "question": "which entity has a roof structure", "label": 1}, {"captions": ["a computer mouse on a piece of paper with a hole in it.", " a large metal building with a roof and truss structure."], "sample_ids": ["453edfa600a0493f9379738ea8393273", "b85a99699ccd4bcba213322113bb253d"], "properties": ["paper, hole, mouse", "roof, truss, structure"], "captions_pred_pc": ["above a black and white illustration of an object in the shape of a cloud", "of a metal grate on a white background"], "captions_pred_image": ["a mouse on top of a piece of paper", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": ["a 3d wooden pole with a metal spear handle.", " a gray object featuring an axe, guitar headstock, and head."], "sample_ids": ["1d5cf234576e41f0ba209c5e19d2db47", "ac5c86f38c8e4570a7eefff0958185cf"], "properties": ["- material is wood, metal, metal", "Headstock, Guitar, Head"], "captions_pred_pc": ["of a black and white illustration of a vase on a pedestal", "a black and white image of a toothbrush on a white background"], "captions_pred_image": ["a black and white image of an old-fashioned pitchfork", "a 3d model of an axe head"], "question": "which object has a headstock", "label": 1}, {"captions": [" of a destroyed building with a watercolor painting of a dilapidated house.", " a small house with a tree, pool, and pond in a green and blue landscape."], "sample_ids": ["5a33f024faf145ac80cdadcdfef8a797", "e22489ba182f45cb81f0a83f22abe9bd"], "properties": ["image, building, painting", "house, tree, pool"], "captions_pred_pc": ["above a black and white drawing of a building", "in 15 words or less a black and white image of a square with dots around it"], "captions_pred_image": ["a black and white photograph of a damaged house", "a 3d model of a house and a tree in a box royalty free 3d model preview no.3"], "question": "which image shows a house with a pool?", "label": 1}, {"captions": [" a white wall in an empty room.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["3f6239bdb440449aba5a8eab6134dda1", "b896a0898efe4059a776193c02132129"], "properties": ["room, wall, color", "- material is stone, metal, concrete"], "captions_pred_pc": ["of a black and white photo of a fishing rod", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d rendering of a white wall in a room", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": [" a white and silver human torso sculpture with rocks.", "\"multiple white cubes arranged in a row on a gray background.\""], "sample_ids": ["3978258c3f26401681c6e44b404e2cca", "17c8222d4ce04e518117078e7de6aaed"], "properties": ["color, material, texture", "color, background, white"], "captions_pred_pc": ["above a black and white drawing of a skull", "a black and white image of a box with the words 'box 2' and 'thoughtful'"], "captions_pred_image": ["a 3d sculpture of a rock formation on a white background", "an image of a white background with a few small cubes on it"], "question": "which object is white", "label": 0}, {"captions": [" a leather recliner chair and ottoman set, featuring swivel functionality and available in modern orange, tan, and brown colors.", " a small, rocky island with diverse terrain and scattered rocks."], "sample_ids": ["44be138ae8e2409bbbca44a96fc67d45", "09f2cf267e954c958828325067bcc36a"], "properties": ["color, tan, brown, orange", "island, terrain, rocks"], "captions_pred_pc": ["above a black and white illustration of an office chair", "above a black and white photo of a small island in the middle of a body of water"], "captions_pred_image": ["a grey leather lounge chair with ottoman and footstool", "a black and white image of a piece of dirt on the ground"], "question": "which entity has more rocks", "label": 1}, {"captions": ["a featuring white and red cubes, and a pink and white chair.", " of a white plastic tube with a hole and a chip on it."], "sample_ids": ["f2c44a82ba744ba8b93e9a1c2272c117", "9968e06a62e8487ea33460e640abc573"], "properties": ["color, white, red, pink", "color is white, material is plastic, shape is tube"], "captions_pred_pc": ["a black and white illustration of a house made of dots", "a black and white image of a broom on a stand"], "captions_pred_image": ["a 3d model of a white structure with stairs", "a white object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a snow-covered mountain with blue and white stripes.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["a95e4948175142f39e7d157f801c60c3", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["color, shape, texture", "roof, color, yellow"], "captions_pred_pc": ["above a black and white image of a spiral in the sky", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a mountain range in the desert royalty-free 3d model preview no. 1", "a 3d model of a house with a roof"], "question": "which entity has a roof that is the color of yellow?", "label": 1}, {"captions": [" of a house with a roof.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["195ce38d57164eb588d19f8bd337f36e", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["roof, house, roof", "roof, color, yellow"], "captions_pred_pc": ["a black and white drawing of a toilet on a white background", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small house royalty free 3d model preview no 2", "a 3d model of a house with a roof"], "question": "which house has a roof", "label": 1}, {"captions": [" a small house with a blue roof.", " a small white barn with a metal roof."], "sample_ids": ["fa21afd3a99d448cb23fa527a784769c", "4ca3342a96824684845f7d0e062ab176"], "properties": ["roof, color, blue", "roof, metal, white"], "captions_pred_pc": ["a house made of dots on a white background a house made of dots on a white background royalty free illustration", "in 15 words or less a black and white illustration of a house made of dots"], "captions_pred_image": ["a 3d model of a house with a porch and balcony royalty-free 3d model preview no.2", "a 3d model of a barn"], "question": "which roof is made of metal", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " a small white building featuring a green drawer, white curved wall, kitchen sink, and windows, resembling a floor plan of a gym."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "b494049bb15440949e465d54a72b2f02"], "properties": ["color, white, black, white", "building, floorplan, gym"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "above a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d model of a white building with two floors"], "question": "which entity is a floor plan of a gym?", "label": 1}, {"captions": [" of a white sculpture, resembling a horse and paper plane, on a gray background.", " a wooden object, including a board, piece of wood, box, and shelf."], "sample_ids": ["179b4438edfc4a43a27a83784f38ff4b", "c986212445a1466ca7be7b5ac6bea729"], "properties": ["color, background, white", "wood, board, shelf"], "captions_pred_pc": ["above a black and white image of a sculpture in the shape of a bird", "a black and white drawing of snowflakes on a white background"], "captions_pred_image": ["a 3d printed sculpture of a horse's head on a gray background", "a 3d rendering of a piece of marble"], "question": "which object is made of wood", "label": 1}, {"captions": [" of a tv stand/shelf", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["83e82604e19342c3bee1ef59426d8e2b", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["size, material, color", "a, material, clay"], "captions_pred_pc": ["a black and white illustration of a person sitting on a bench", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d rendering of a flat screen television on a shelf", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": ["a 3d blue star featuring various text, including \"prchen,\" \"bible chen,\" \"rib chicken,\" and \"birch chen.\"", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["0b712fc68ad44ce8a33592d2b26aac18", "6b745457e06840119058883b35f78f58"], "properties": ["color, shape, text", "roof, color, blue"], "captions_pred_pc": ["a black and white image of a pair of sunglasses", "a black and white image of a building with dots"], "captions_pred_image": ["a white paper airplane flying over a gray background", "a 3d model of a house with a steeple on top"], "question": "which entity has a blue roof", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["roof truss, insulation, suspended ceiling", "torso, breasts, pattern"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d model of a woman's chest"], "question": "which entity has a teddy bear?", "label": 1}, {"captions": ["a 3d white box with an open door and lid.", "a 3d white cube featuring a hole, wheels, and a diamond."], "sample_ids": ["4e95f0eca97f48d6af1888a8bacec9f6", "e44009d33258425e8efedfbc6823bf70"], "properties": ["- color is white - shape is box - material is plastic", "- color is white- shape is cube- material is plastic"], "captions_pred_pc": ["a black and white square with dots all over it", "for a black and white image of a toothbrush in the shape of a toothbrush"], "captions_pred_image": ["a 3d rendering of a white box with an open lid", "a 3d model of a white cube"], "question": "which object is white", "label": 1}, {"captions": [" a spacious office featuring numerous desks, chairs, computers, and a ceiling with blue and white triangles and a ceiling light.", " of a house with a roof structure, including a greenhouse."], "sample_ids": ["0d7e4d9471414a21b4a5b18a54f7ec22", "d7483292784b4e2b81df1c50f2a8664a"], "properties": ["ceiling, light, desks", "roof, structure, greenhouse"], "captions_pred_pc": ["a black and white drawing of a square on a white background", "a 3d illustration of a window with dots on a white background 3d illustration of a window with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of an office space with desks and chairs", "a 3d model of a building with a roof"], "question": "which entity has a roof structure", "label": 1}, {"captions": [" a house with roof trusses and wooden beams on a suspended ceiling.", " a small white building with stairs and a white table."], "sample_ids": ["3c2e3a3670b042069bd8290e2c357702", "e30374c614f54fdb90f35b96b071349d"], "properties": ["roof trusses, beams, suspended ceiling", "building, stairs, table"], "captions_pred_pc": ["above a black and white drawing of a building", "above a black and white drawing of a cat sitting on top of a letter 'e'"], "captions_pred_image": ["a 3d model of a house with a roof in progress royalty free 3d model preview no. 1", "a 3d model of a building with a staircase"], "question": "which entity has a table?", "label": 1}, {"captions": ["a featuring a rock with a hole, a piece of metal, a knife, an arrow, and a person near a sand pit.", " of a white plastic tube or metal bar, resembling a knife."], "sample_ids": ["b57936676e9d43abb635fa1217992287", "8fd3836862a44a8d8b4d224bfc30c2c3"], "properties": ["a, hole, rock", "a knife, blade, handle"], "captions_pred_pc": ["a black and white image of a lace belt", "a black and white image of a shelf with a white background"], "captions_pred_image": ["a 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon'", "a piece of white plastic on a gray background"], "question": "which object is not a knife?", "label": 0}, {"captions": ["a featuring a metal refrigerator, a graffiti-covered sink, and a metal toilet, all with rusted elements.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["e65de9c4ec9242679a45e74733f7d61d", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["rusty, sink, graffiti", "roof, color, yellow"], "captions_pred_pc": ["a black and white drawing of a room with a door and a rug on the floor a black and white drawing of a room with a door and a rug on the floor royalty free illustration", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a stainless steel toilet bowl on a pedestal", "a 3d model of a house with a roof"], "question": "which roof is yellow", "label": 1}, {"captions": [" a two-story building with stairs and a three-tier display.", " a building or house with a roof and floor plan, resembling a pyramid with a flat roof."], "sample_ids": ["1d817b7b6ded439d8b92eeab87e4cf8d", "7a91292e1ed64e60a1bbbb499209a0df"], "properties": ["tiers, building, stairs", "apse, roof, floor plan"], "captions_pred_pc": ["for a black and white photo of a person sitting on a bench", "a black and white drawing of a room"], "captions_pred_image": ["a 3d model of a building with two floors and a spiral staircase", "a 3d model of a building in the shape of a pyramid"], "question": "which building has a flat roof", "label": 1}, {"captions": [" of a pile of metal, torn and shredded paper, rocks, and a decayed animal.", "a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier."], "sample_ids": ["c117f1923cad4ecf9df61b6e3d633374", "ef8288c9fdfc4e0f9c1fe25d570a104e"], "properties": ["a pile of metal, torn and shredded paper, rocks, and a decayed animal", "color is white, yellow, plastic"], "captions_pred_pc": ["a black and white map of germany on a white background", "a black and white image of a metal bowl with dots"], "captions_pred_image": ["a 3d model of a crumpled piece of paper on a white background", "a white plastic container with a label on it"], "question": "which entity is made of plastic", "label": 1}, {"captions": ["a 3d white arrow, letter s, toothpick, skateboard, and knife forming a logo.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["2ee9dcd863514073a849ece8ea7714dd", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["3D, toothpick, skateboard", "island, mountain, grass"], "captions_pred_pc": ["above a black and white image of a person's hand holding a pencil", "a black and white map of the island of malta"], "captions_pred_image": ["a white toothbrush on a gray background", "a 3d image of a small island in the middle of a lake"], "question": "which entity has grass?", "label": 1}, {"captions": ["a featuring white and red cubes, and a pink and white chair.", "a featuring a room with a staircase, a damaged bus and rv, two houses, a large white box, and destroyed buildings with debris."], "sample_ids": ["f2c44a82ba744ba8b93e9a1c2272c117", "a47dcf3d3cf34c58af17b6715d6f1232"], "properties": ["color, white, red, pink", "room, staircase, bus"], "captions_pred_pc": ["a black and white illustration of a house made of dots", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d model of a white structure with stairs", "a 3d image of a building with a lot of debris"], "question": "which entity has more staircases", "label": 1}, {"captions": [" of a black and white coffee table with a laptop, featuring a performance studio and ceiling grid structure.", " a damaged room with destroyed furniture, featuring a kitchen, bathroom, and broken window."], "sample_ids": ["a177693cc8c7428292680816001b48c6", "06dd6456dc244a51b6b6e1c8524820de"], "properties": ["black, white, laptop", "room, furniture, window"], "captions_pred_pc": ["a black and white drawing of a room with dots on the floor", "above a black and white drawing of a person sitting on a bench"], "captions_pred_image": ["a dishwasher with a dish inside it", "a 3d image of a room with a person in it"], "question": "which room has furniture", "label": 1}, {"captions": [" a modern wooden headboard with a metal frame and a white wall with a black strip.", " a pink-framed building structure with beams and trusses."], "sample_ids": ["bcaadd18d8b94e91a5f9d8ff39dc9b79", "18e392c5360146eda498c5edab25b15c"], "properties": ["headboard, wall, strip", "frame, beams, trusses"], "captions_pred_pc": ["of a black and white image of a long black lace scarf", "a black and white drawing of a metal grate"], "captions_pred_image": ["a 3d rendering of a headboard in white and black", "a 3d model of a building under construction"], "question": "which entity has a frame", "label": 1}, {"captions": [" a building featuring yellow columns, a yellow roof, and a wooden structure.", " a large white and metal building with a metal roof structure."], "sample_ids": ["0ce6a4102f4f40e2a0084938b0a93941", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["structure, columns, roof", "roof, metal, white"], "captions_pred_pc": ["a black and white drawing of a window", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a 3d model of a building with multiple levels", "a 3d model of a large white box"], "question": "which building has a metal roof", "label": 1}, {"captions": [" a crab with long legs, wires, and tentacles, resembling a hybrid of an octopus, squid, spider, and robot.", " a black and white cube-shaped building with a staircase."], "sample_ids": ["ec5914b53b6a4cde8de4820050bc46c5", "587e65f2d904440488a98dfa9a4e9dbe"], "properties": ["resembles, octopus, squid, spider, robot", "shape is cube, color is black, white"], "captions_pred_pc": ["a black and white illustration of a jellyfish", "above a black and white photograph of a sculpture"], "captions_pred_image": ["a 3d model of a robotic octopus on a white background royalty free 3d model preview no.2", "a black and white 3d model of a building"], "question": "which entity is a cube?", "label": 1}, {"captions": [" a small house with a red roof.", " a small, modern house with a green roof, located on a hill, surrounded by trees, grass, and a pond."], "sample_ids": ["085db9059b744673b5623b5338e02196", "a452d5381dad4dc09f5ebe10635ae5fe"], "properties": ["roof, red, house", "house, roof, green"], "captions_pred_pc": ["a black and white dotted square on a white background", "above an illustration of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white"], "captions_pred_image": ["a 3d model of a small shed in the snow", "a 3d model of a building with a black roof"], "question": "which house has a green roof", "label": 1}, {"captions": [" a robot with a red, bloody head and red eyes, accompanied by a small toy rocket and a white and red spaceship.", " a four-legged metal workbench with shelves."], "sample_ids": ["dd61409988b7464ab025cc1c15f12f43", "e93b633d477942d9b79ef8ab566473d6"], "properties": ["red, eyes, rocket", "Four legs, Metal, Shelf"], "captions_pred_pc": ["of a black and white image of a clock", "for a black and white illustration of a cross"], "captions_pred_image": ["a 3d model of a futuristic motorcycle helmet on a white background", "a 3d model of a table with four legs"], "question": "which object has four legs", "label": 1}, {"captions": [" a green cube with a black base and a small robot sitting on top.", " a small white table with stairs and a ladder, featuring a black and white kitchen hood and a black square ceiling light."], "sample_ids": ["c52f7e1f4b194d308b5ddde6deca3955", "5565c16f297e405f9d5dbf0ebb623605"], "properties": ["color, shape, size", "table, stairs, ladder"], "captions_pred_pc": ["in one line a black and white image of a dotted square on a white background royalty free illustration", "above a black and white photograph of a small square in the center of the image"], "captions_pred_image": ["a 3d model of a white box with a black base", "a 3d model of a table with a stool on top"], "question": "which object has a black base", "label": 1}, {"captions": [" a black and white cube-shaped building with a staircase.", " a white motorcycle with wings."], "sample_ids": ["587e65f2d904440488a98dfa9a4e9dbe", "7e684a7c012c4fd0ac91844f22457640"], "properties": ["shape is cube, color is black, white", "color, white, wings"], "captions_pred_pc": ["above a black and white photograph of a sculpture", "a black and white image of a pair of sunglasses"], "captions_pred_image": ["a black and white 3d model of a building", "a 3d model of a motorcycle on a white background"], "question": "which object is white", "label": 1}, {"captions": [" of a metal tool with a yellow handle, a laptop, and a ceiling light fixture.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["b714bf13e9e54acb867c2c1b3ccf8ae8", "6b745457e06840119058883b35f78f58"], "properties": ["metal, laptop, light fixture", "roof, color, blue"], "captions_pred_pc": ["for a black and white image of a corner shelf", "a black and white image of a building with dots"], "captions_pred_image": ["a 3d model of a telescope on a stand", "a 3d model of a house with a steeple on top"], "question": "which entity has a roof that is blue", "label": 1}, {"captions": [" a tall glass tower featuring blue, green, and white squares.", "a white of a man in a suit and mask, possibly a diving suit."], "sample_ids": ["405d68eda26c401fbcddc7e3a457c74e", "205251e4277e41d1aae6b2358267ad56"], "properties": ["color, shape, height", "image, color, white"], "captions_pred_pc": ["of a black and white photo of a pair of earrings", "a black and white image of a beetle on a white background"], "captions_pred_image": ["a black and white photograph of a metal sculpture on a pedestal", "a 3d printed figurine of an alien creature"], "question": "which entity is a white image?", "label": 1}, {"captions": [" a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["b16fb21cda9a4a21a024df749c2304f4", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["roof, ceiling, hole", "torso, breasts, pattern"], "captions_pred_pc": ["a black and white image of a square with dots on it", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a 3d model of a small house and a tree in the foreground", "a 3d model of a woman's chest"], "question": "which entity has a pattern", "label": 1}, {"captions": [" a building with yellow and pink walls, a pink roof, and multicolored structural elements.", " a house with a roof, roof truss, and suspended ceiling structure."], "sample_ids": ["8b1eda5d040f4dc29255cde2db077c30", "5abf69f79b92484fb54d41ff0c0a2c11"], "properties": ["color, roof, wall", "roof, truss, suspended ceiling"], "captions_pred_pc": ["a black and white drawing of a building", "a suitcase made of dots on a white background a suitcase made of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a building structure", "a 3d model of a house with roof trusses"], "question": "which entity has a roof truss", "label": 1}, {"captions": [" a yellow and white structure with yellow poles.", " a house with a wooden-framed roof structure."], "sample_ids": ["bada91e216fd486d9e3e356d48978f33", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["color, shape, poles", "roof, material, wood"], "captions_pred_pc": ["a black and white drawing of a dotted pattern on a white background a black and white drawing of a dotted pattern on a white background royalty free illustration", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a building with multiple levels", "a 3d model of a building with a roof"], "question": "which structure has a roof made of wood", "label": 1}, {"captions": [" a white cylinder with a blue light and ring around it.", " of two rocks with ice elements."], "sample_ids": ["f91e2e3340604001bd6ea2ab25886e46", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["color, light, ring", "image is a rock with ice elements"], "captions_pred_pc": ["in 15 words or less a black and white image of a plastic sponge on a white background royalty free illustration", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a clear plastic tube", "a 3d image of two rocks on a gray surface"], "question": "which entity is a rock?", "label": 1}, {"captions": ["a 3d white object resembling a knife, sword, and airplane.", " of a white rock-like object, possibly a shell or ice."], "sample_ids": ["d88df1cb10da467bb6f77af6aeaa8f86", "096e42b466ec438d95c5d89a85191534"], "properties": ["shape is cylinder, color is white, material is plastic", "white, rock, shell"], "captions_pred_pc": ["of a spike on a white background", "in one hundred words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words"], "captions_pred_image": ["a 3d model of a paper airplane", "a 3d model of a white rock on a gray background"], "question": "which object is whiter", "label": 1}, {"captions": [" of a jacket on a mannequin with arms outstretched.", " of a meat skewer with a small piece of bread and a sausage on a stick."], "sample_ids": ["1dc7708fd7bd4ea1b035f4c48dbd7868", "1728f2cb8eca4080af02b22262ff45d5"], "properties": ["arm, jacket, mannequin", "meat, bread, sausage"], "captions_pred_pc": ["a black and white image of a woman's torso", "a black and white image of a brush on a white background"], "captions_pred_image": ["a 3d model of a woman's jacket royalty free 3d model preview no.2", "an image of a small white object on a gray background"], "question": "which entity has more bread", "label": 1}, {"captions": [" of a japanese-style pagoda house in a pixelated village.", " a small white barn with a metal roof."], "sample_ids": ["42c4e6ca4a0c4b7b9a97d543b2442222", "4ca3342a96824684845f7d0e062ab176"], "properties": ["image is a japanese-style pagoda house in a pixelated village", "roof, metal, white"], "captions_pred_pc": ["a black and white drawing of a square on a white background stock illustration \u00a9 2019 iStock", "in 15 words or less a black and white illustration of a house made of dots"], "captions_pred_image": ["a 3d model of a japanese temple and pagoda", "a 3d model of a barn"], "question": "which building has a white roof", "label": 1}, {"captions": [" of a gray stereo system with a blue drawer, resembling a printer and computer with a blue screen, featuring a cd player.", " of a person breaking through a brick wall."], "sample_ids": ["3d4d965e67744415b69ff6aaeb11f420", "0708be2186d24d52815e8ac9d751fc37"], "properties": ["color, screen, drawer", "image, brick, wall"], "captions_pred_pc": ["above a black and white image of a brush", "for a black and white illustration of an ice cream cone"], "captions_pred_image": ["a 3d model of a printer on a white background royalty free 3d model no.2", "a 3d model of a person breaking through a brick wall"], "question": "which image shows a wall made of bricks?", "label": 1}, {"captions": ["a 3d printed model of a human skeleton foot and leg.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["6eea1e2ab3f645f6a4670cccbc85c908", "b896a0898efe4059a776193c02132129"], "properties": ["size, material, color", "- material is stone, metal, concrete"], "captions_pred_pc": ["for a black and white illustration of an owl in the shape of a heart", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of the foot and ankle bones royalty free 3d model preview no.", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": ["a featuring a small room with a bunk bed, desk, chair, table, and a blue house.", " a white castle composed of small cubes."], "sample_ids": ["dd3a9323ed514ccab330973ff9588015", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["room, bed, desk", "composed of, white, cubes"], "captions_pred_pc": ["a black and white drawing of a door", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["a 3d model of a small room with a bunk bed", "a 3d model of a castle made of white cubes"], "question": "which entity is composed of small cubes", "label": 1}, {"captions": [" a white and black horned demon with bunny features.", " a black and purple dragon with wings and purple eyes."], "sample_ids": ["1d3537d1341a423f89990e9b06924904", "9fdaa7bf7dbe499482d10705cbe366d2"], "properties": ["color, horns, features", "Eye color, Black, Purple"], "captions_pred_pc": ["above a black and white image of a pair of headphones", "a black and white illustration of an airplane on a white background"], "captions_pred_image": ["a 3d model of a skull with horns on its head", "a 3d model of a dragon with wings"], "question": "which entity has purple eyes", "label": 1}, {"captions": [" of a white and wooden chest of drawers cabinet.", " a house with a pink roof, brick walls, and insulated ceiling."], "sample_ids": ["d5722274fb094222aca90bb59f4dff09", "c8936ace72954650b4e2d84246964849"], "properties": ["chest of drawers, cabinet, white", "roof, color, pink"], "captions_pred_pc": ["a cross on a white background vector illustration of a cross on a white background royalty free stock illustrations", "a black and white drawing of a toilet"], "captions_pred_image": ["a 3d rendering of a white cabinet with a drawer", "a 3d model of a house with a roof"], "question": "which entity is a building?", "label": 1}, {"captions": [" a large metal building with a roof and truss structure.", " a white building, table, and various piles of paper, including a low-poly object."], "sample_ids": ["b85a99699ccd4bcba213322113bb253d", "515210fb031f4ec89021ee8ce9e432e9"], "properties": ["roof, truss, structure", "- building is white, table is white, piles of paper are white"], "captions_pred_pc": ["of a metal grate on a white background", "a black and white drawing of a piece of paper"], "captions_pred_image": ["a 3d model of a long metal fence", "a 3d model of a snowy landscape"], "question": "which building is white", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", " a small house with a blue roof."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "fa21afd3a99d448cb23fa527a784769c"], "properties": ["color, shape, and size", "roof, color, blue"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "a house made of dots on a white background a house made of dots on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d model of a house with a porch and balcony royalty-free 3d model preview no.2"], "question": "which entity is a building?", "label": 1}, {"captions": [" a small island featuring a large building, trees, and a house on a hill, surrounded by a forest.", " of a barrel and a cube together."], "sample_ids": ["d557c62e9be741a6b0f6b204d11a9c6f", "f00b7661daf544b68cddf85d7d0308c7"], "properties": ["house, hill, forest", "a, barrel, cube"], "captions_pred_pc": ["above a black and white illustration of a small island in the middle of a body of water", "a black and white illustration of a 3d cube and a 3d sphere"], "captions_pred_image": ["a black and white image of a small island in the middle of a body of water", "a 3d model of a barrel and a box next to each other royalty free 3d model preview no.3"], "question": "which object is not a barrel?", "label": 0}, {"captions": [" of a small white dollhouse featuring furnished rooms, including a bedroom with a bed and desk, and a bathroom.", "a white of a house with a hole in the ceiling."], "sample_ids": ["f178fb523ad7421aaa90a92ee736ee00", "2915cbd03e164ac0bb13866c2d68cc65"], "properties": ["bedroom, bathroom, bed", "image, house, ceiling"], "captions_pred_pc": ["a black and white drawing of a room with dots", "above a black and white drawing of a house"], "captions_pred_image": ["a 3d model of a small room with a bed, desk, and chair", "a 3d model of a house with a balcony"], "question": "which image shows a house with a hole in the ceiling?", "label": 1}, {"captions": [" a silver spaceship-skull hybrid with blue lights, spheres, and jewels, featuring a robot with a head and blue eyes.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["ae7c805076b14abe83578069c7bf1c03", "bded33af34104b9686b845dfd18309a9"], "properties": ["color, light, jewels", "table, staircase, light"], "captions_pred_pc": ["in 15 words or less a black and white image of a snowflake on a white background royalty free illustration", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d sculpture of a heart with various objects on top of it", "a 3d model of a small table with a staircase"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" a small white barn with a metal roof.", " a house featuring a roof with wooden trusses and a ladder."], "sample_ids": ["4ca3342a96824684845f7d0e062ab176", "3cd410c4359a4cef98702963a2b9802b"], "properties": ["roof, metal, white", "roof, trusses, ladder"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a house made of dots", "a black and white drawing of a tv screen on a white background"], "captions_pred_image": ["a 3d model of a barn", "a 3d model of the roof of a building"], "question": "which roof is made of wood", "label": 1}, {"captions": ["a 3d yellow plastic object featuring a cube with a hole, a lid, and a ball.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["7ff3c8f9ab7c49fe93b93130e25fcc4a", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["color, material, shape", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["a black and white photo of a pair of earrings", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a white object on a gray background", "a white 3d model of a city skyline"], "question": "which entity is a still image?", "label": 1}, {"captions": [" a small building with stairs and a glass floor, featuring a square table and a black square ceiling light.", " of a white tiled floor with a ceiling light and small holes."], "sample_ids": ["8aaad713b8834739b008ccf2f3d86cce", "9906caefe141465990aacb312e1025f0"], "properties": ["floor, table, light", "light, floor, ceiling"], "captions_pred_pc": ["above a black and white photograph of a window", "a black and white polka dot pattern on a white background"], "captions_pred_image": ["a black and white 3d model of a staircase on a platform", "a 3d model of a white tile floor"], "question": "which floor is not tiled?", "label": 0}, {"captions": [" of a white chair with arms and legs.", "a featuring a metal refrigerator, a graffiti-covered sink, and a metal toilet, all with rusted elements."], "sample_ids": ["61ec56afad7a45deb99ccf1ab1bd2d73", "e65de9c4ec9242679a45e74733f7d61d"], "properties": ["Arms, Legs, Color", "rusty, sink, graffiti"], "captions_pred_pc": ["of a 3d rendering of a chair in the style of the 1920s and 1930s", "a black and white drawing of a room with a door and a rug on the floor a black and white drawing of a room with a door and a rug on the floor royalty free illustration"], "captions_pred_image": ["a 3d model of a white outdoor chair royalty free preview no 3", "a stainless steel toilet bowl on a pedestal"], "question": "which entity has more rusty elements", "label": 1}, {"captions": [" a white rocking chair with a curved backrest.", " a robotic warrior with a sword."], "sample_ids": ["ee0deb90abf943b6894cd5ded1331213", "1c54afa26eb24e19b8660066718a9c5a"], "properties": ["backrest, curved, yes", "weapon, sword, robot"], "captions_pred_pc": ["a black and white illustration of a spiral pattern on a white background a black and white illustration of a spiral pattern on a white background royalty free illustration", "a black and white image of a small white object on a black surface"], "captions_pred_image": ["a 3d model of a white chair royalty free 3d model no. 3", "a black and white image of a robot standing on a piece of paper"], "question": "which entity is a robot?", "label": 1}, {"captions": [" a small white house with stairs and a spiral staircase, featuring a white table and ceiling light.", " a small house on a hill in a field."], "sample_ids": ["e9e1cc7fae22458197a61f43a9c355f4", "bd873071252047d38160c4a5fdd2c1b7"], "properties": ["house, staircase, table", "house, hill, field"], "captions_pred_pc": ["above a black and white photograph of a dog in a frame", "a black and white photograph of a piece of paper"], "captions_pred_image": ["a 3d model of a small house with a spiral staircase", "a black and white image of a small house"], "question": "which house is on a hill?", "label": 1}, {"captions": [" a brick wall with grass.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["53f2d948091f417cb580e22469c94db2", "a17477b445b3443189dad22f768b888b"], "properties": ["brick, grass, wall", "roof, pillar, stairs"], "captions_pred_pc": ["above a black and white illustration of an underwater scene", "a black and white image of a square with dots"], "captions_pred_image": ["a black and white photo of a brick wall and a puddle", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": [" a large house with a roof on a platform.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["cb3e09a301b746918a682a595037c7f7", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["roof, platform, house", "house, pool, balcony"], "captions_pred_pc": ["a black and white image of a piece of paper", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a modern house"], "question": "which house has a pool", "label": 1}, {"captions": [" a white hospital operating room with blue containers and medical equipment.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["a10f3196831647bc8842eacac640047d", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["color, white, containers", "door, lock, handle"], "captions_pred_pc": ["a black and white illustration of the letter 'f' made up of tiny dots", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room", "a black and white image of a door with a crack in it"], "question": "which door has a lock", "label": 1}, {"captions": [" a building or house with a roof and floor plan, resembling a pyramid with a flat roof.", " of a small wooden house with a roof."], "sample_ids": ["7a91292e1ed64e60a1bbbb499209a0df", "f5904a9d87ff4fa688146c18c1f27fec"], "properties": ["apse, roof, floor plan", "roof, house, wooden"], "captions_pred_pc": ["a black and white drawing of a room", "a black and white drawing of a house with dots"], "captions_pred_image": ["a 3d model of a building in the shape of a pyramid", "a 3d model of a small house"], "question": "which house has a roof", "label": 1}, {"captions": [" of a white tiled floor with a ceiling light and small holes.", " a small white building with stairs and a white table."], "sample_ids": ["9906caefe141465990aacb312e1025f0", "e30374c614f54fdb90f35b96b071349d"], "properties": ["light, floor, ceiling", "building, stairs, table"], "captions_pred_pc": ["a black and white polka dot pattern on a white background", "above a black and white drawing of a cat sitting on top of a letter 'e'"], "captions_pred_image": ["a 3d model of a white tile floor", "a 3d model of a building with a staircase"], "question": "which entity has a table?", "label": 1}, {"captions": [" a small green chair with a slanted back and white base.", " a white and black striped box resembling a stack of blocks or paper."], "sample_ids": ["7f93c12cbbc74e579d5f0430cfa0010f", "78246d66fd2e4e1195bc4536f4037862"], "properties": ["color, white, base, slanted", "striped, white, black"], "captions_pred_pc": ["above a black and white drawing of a chair", "a black and white illustration of a pair of shoes on a white background vector illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a"], "captions_pred_image": ["a 3d model of a chair", "a 3d model of a stack of books"], "question": "which object is white and black?", "label": 1}, {"captions": [" a woman with long wings, legs, hair, and spikes, accompanied by a spider with long legs.", " a clay pot with holes in it."], "sample_ids": ["68cf560d0c424ec6a3c58e1b9967508d", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["hair, legs, wings", "hole, material, clay"], "captions_pred_pc": ["of a white and black bird flying in front of a white background", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a woman with a large spider-like creature on her back", "a clay sculpture of a face with holes in it"], "question": "which entity is made of clay", "label": 1}, {"captions": [" a house featuring a roof, floor plan, heating system, and ceiling light fixture.", " a small white house with a roof."], "sample_ids": ["269939560e08432f9e134309b9a1c587", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["floor plan, heating system, ceiling light fixture", "roof, color, white"], "captions_pred_pc": ["a black and white drawing of a house", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d model of a building with a glass facade", "a 3d model of a building with a white roof"], "question": "which house has a roof", "label": 1}, {"captions": [" of a meat skewer with a small piece of bread and a sausage on a stick.", "a featuring a fireplace with a broom, shovel, and other items, a man and a dog, a table with a bucket, and a vase with a pipe."], "sample_ids": ["1728f2cb8eca4080af02b22262ff45d5", "fdcbb46224a44faca5c3fb20264eb2e7"], "properties": ["meat, bread, sausage", "broom, shovel, fireplace"], "captions_pred_pc": ["a black and white image of a brush on a white background", "a close-up view of a white plastic bag with a small hole in it"], "captions_pred_image": ["an image of a small white object on a gray background", "a 3d model of an outdoor fireplace"], "question": "which entity has a fireplace?", "label": 1}, {"captions": [" of a pillow featuring a hot dog shape and a bag of chips with a dragon design.", " of a stone wall with a window and a clock."], "sample_ids": ["4ae6ac813d584d12a5d5d608a595bfe5", "46fc13a04c8b47fa86215b9efc2eb1f9"], "properties": ["shape is hot dog, design is dragon, color is black", "window, clock, wall"], "captions_pred_pc": ["a black and white illustration of an eye with dots", "a black and white illustration of a bullet in the middle of a starry sky"], "captions_pred_image": ["a black and white photo of a pillow with a soda can on it", "a 3d model of a brick wall"], "question": "which object has a window?", "label": 1}, {"captions": [" a purple sphere with polka dots, flowers, and a hole, containing a pink ball inside.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["1000e3065aaa4d6fb93cea89b99e1748", "bded33af34104b9686b845dfd18309a9"], "properties": ["color, shape, material", "table, staircase, light"], "captions_pred_pc": ["a black and white image of a dotted circle on a white background royalty free illustration", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d model of an object with a ball in the center", "a 3d model of a small table with a staircase"], "question": "which entity is a table?", "label": 1}, {"captions": [" a medieval stone castle with walls and stairs.", " a staircase with a railing, table, and chair, featuring a square ceiling light."], "sample_ids": ["10bc1bbec4c045f9b15fc2156b5e32ee", "51a0fba79bce472a8b827b78a110b77f"], "properties": ["wall, stairs, castle", "stair, table, chair"], "captions_pred_pc": ["a castle in the snow a castle in the snow illustration on a white background royalty free illustration", "for a black and white image of a toilet paper holder"], "captions_pred_image": ["a 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the", "a 3d model of a staircase in a room"], "question": "which staircase has a railing", "label": 1}, {"captions": [" a house with a pink roof, truss, and a square white ceiling lamp.", " a staircase with a railing, table, and chair, featuring a square ceiling light."], "sample_ids": ["91b2e9e4660946f5b4808a18b5323b69", "51a0fba79bce472a8b827b78a110b77f"], "properties": ["roof, truss, lamp", "stair, table, chair"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "for a black and white image of a toilet paper holder"], "captions_pred_image": ["a 3d model of a house with a metal roof", "a 3d model of a staircase in a room"], "question": "which entity has a table and chair?", "label": 1}, {"captions": [" a white plastic container with a lid, a small box, a cup, a bottle, and a jar.", " a spiral staircase with railings."], "sample_ids": ["20a02705a66f460492e07345e84a62ed", "7f24a859ed3d4b7c86f940d8cc11218f"], "properties": ["a box, a cup, a bottle, a jar", "railings, staircase, spiral"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "above a black and white drawing of a spiral staircase"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic cap, and a plastic container", "a 3d rendering of a staircase in a room"], "question": "which object has railings", "label": 1}, {"captions": ["white wall lamp with a leaf design", " a white building with a red roof."], "sample_ids": ["7afad3ca209c4a91b5457d28bdc2b916", "1f9580be397d4f948bf53fe1d5bc5756"], "properties": ["design, leaf, white", "color, white, roof, red"], "captions_pred_pc": ["above a black and white image of a circular object", "in 15 words or fewer a black and white drawing of a camera"], "captions_pred_image": ["a white wall sconce with a leaf design and a light bulb", "a 3d model of an office building"], "question": "which entity is a building?", "label": 1}, {"captions": [" of a toy gun with a green and white handle, featuring red and green lights.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["81f3fac6a5174827aa6cee5353ba6561", "b896a0898efe4059a776193c02132129"], "properties": ["color, light, handle", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d printed model of a futuristic gun", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": [" a white shelf with holes and brackets.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["8f17016d6c0049fc98c5bfe5e6265740", "b896a0898efe4059a776193c02132129"], "properties": ["color, white, holes", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white image of a triangle with dots on it", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a white bench with a shelf on top", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": ["a white plastic bottle with a lid and cap.", " of a plague mask with a rusty, horned, wooden helmet and a crow's head design."], "sample_ids": ["f7e60e3a8ee84ad0954d288c3f1a7220", "2b0896f810074399a5ae7d6dbab8c330"], "properties": ["color is white, material is plastic, shape is bottle", "- material is wood, rusty, horned"], "captions_pred_pc": ["of a black circular object on a white background", "in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a", "3d model of a plague doctor's mask"], "question": "which object is made of wood", "label": 1}, {"captions": [" a room featuring a table and chairs, with blue and green walls.", " a house with wooden framing and trusses."], "sample_ids": ["a49899d9a6194583b745e02f3654841e", "4501794e257c4a8ba60a94757d8e93a9"], "properties": ["color, table, chairs", "frame, trusses, wood"], "captions_pred_pc": ["a close-up of a white object on a white background", "a black and white drawing of a window"], "captions_pred_image": ["a 3d rendering of a room with a white wall", "a 3d model of a house under construction"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a large, rusty, square-shaped metal pillar resembling a rusted tower.", " a purple chair with holes in it."], "sample_ids": ["b5afccae993346079483507296fbb029", "833151c8e0f4489a9fa966635a948452"], "properties": ["shape is square, material is metal, color is rusty", "color, purple, holes"], "captions_pred_pc": ["above a black and white image of a square frame with dots", "of a silver pendant with an intricate design"], "captions_pred_image": ["a 3d model of a concrete column", "a 3d model of a white chair"], "question": "which object is made of holes", "label": 1}, {"captions": [" of a rock formation with a white cliff and a rock.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["4a25f6dfbea943bca137dacd2f7b984f", "06a1c233fb444830b577aa06e2c01294"], "properties": ["image is rock formation with a white cliff and a rock", "house, tree, hill"], "captions_pred_pc": ["above a black and white map of spain on a white background", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a black and white image of a rock formation on a gray background", "a black and white image of a house in the middle of a field"], "question": "which entity has a hill?", "label": 1}, {"captions": [" a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles.", " a house with a wooden-framed roof structure."], "sample_ids": ["bd7aab78974643f5a0660c699daf8eb3", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["roof, color, yellow", "roof, material, wood"], "captions_pred_pc": ["a black and white drawing of a room", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a table and chairs on a white background", "a 3d model of a building with a roof"], "question": "which roof is made of wood", "label": 1}, {"captions": [" a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["f6c6e7a65a3e42dfa431b1d984c72f28", "c3a82df41875402285608ef13a55df57"], "properties": ["house, fence, dog", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["above a black and white drawing of a bathroom", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": ["a featuring a small desk and chair, table, staircase, bathroom with sink, and square ceiling light in a small house setting.", "a featuring a building, a coin, a small black box, and a ball."], "sample_ids": ["1be04fc7b47c47e9aaa9d2720af16b87", "949cf1a57aea45d18261e980b21b8c35"], "properties": ["desk, chair, table", "a, building, coin, ball"], "captions_pred_pc": ["for a black and white image of a letter 'f'", "a black and white illustration of a triangular shaped object"], "captions_pred_image": ["a 3d model of a staircase in a room", "a 3d model of a box with a coin next to it"], "question": "which entity has a coin?", "label": 1}, {"captions": ["a 3d wooden toy on a wooden plate with a piece of fruit and bread with a knife.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["1c389c8f46b345838e515b9747c1f982", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["plate, fruit, knife", "house, pool, balcony"], "captions_pred_pc": ["a black and white illustration of a hedgehog", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a person riding a skateboard on a wooden surface", "a 3d model of a modern house"], "question": "which entity has a pool", "label": 1}, {"captions": [" of a bench and lamp post.", "a small yellow and black helicopter flying in the air."], "sample_ids": ["ac6e6b5ee63840a484b5ad2508675de1", "7qxP6dQ5nNuaG8d0vswXXKnd0vq"], "properties": ["A, bench, lamp post", "color, yellow, black"], "captions_pred_pc": ["a black and white image of a bench and a street lamp", "a black and white illustration of an airplane"], "captions_pred_image": ["3d model of a street lamp and bench royalty free 3d model preview no.3", "a black and white helicopter flying in the air"], "question": "which object is black and yellow?", "label": 1}, {"captions": [" a city featuring various buildings, including one with a white roof and numerous white cubes, as well as a plane.", " a snowy city with buildings and a plane flying overhead."], "sample_ids": ["a3c50635c2a04e548e57d4f027899131", "cc63ceb2b5e84872a1a1f6423de419e2"], "properties": ["building, roof, white, cubes", "building, plane, city"], "captions_pred_pc": ["above a black and white image of a map", "a black and white photo of an airplane on a white background"], "captions_pred_image": ["a 3d model of a city on a white background", "a 3d model of a city in black and white"], "question": "which building has a white roof", "label": 0}, {"captions": [" a polygonal owl, stone block, and helmet, featuring triangular details and a rock with a hole.", " a stone, wood, rock, sliced bread, and a skull with a blue hat."], "sample_ids": ["a78520b929314d7c8477322e000bc481", "0169af65ffc64bbf8e2fe6c6de08d485"], "properties": ["polygonal, helmet, triangular details", "hat, skull, bread"], "captions_pred_pc": ["a black and white image of a piece of fabric with dots", "a black and white illustration of a skull in the shape of dots"], "captions_pred_image": ["a 3d model of a gray, triangular shaped object", "a black and white image of a stone sculpture"], "question": "which entity has a skull with a blue hat?", "label": 1}, {"captions": ["a 3d wooden sign featuring various texts, including \"resin park,\" \"shabaab corporation,\" and \"johnson sammons.\"", " a four-legged metal workbench with shelves."], "sample_ids": ["34a49861e7744acfb71de471a755e917", "e93b633d477942d9b79ef8ab566473d6"], "properties": ["text, material, shape", "Four legs, Metal, Shelf"], "captions_pred_pc": ["a black and white image of a metal bar with text on it", "for a black and white illustration of a cross"], "captions_pred_image": ["a 3d rendering of a marble slab with text on it", "a 3d model of a table with four legs"], "question": "which object is made of metal", "label": 1}, {"captions": ["a 3d white object resembling a sphere, frog, egg, hat, and shell.", "\"multiple white cubes arranged in a row on a gray background.\""], "sample_ids": ["0a8e0b95d8ce43ee9159ad01d925aad8", "17c8222d4ce04e518117078e7de6aaed"], "properties": ["shape is sphere, color is white, material is plastic", "color, background, white"], "captions_pred_pc": ["a black and white illustration of a sponge in the shape of a sponge sponge black and white illustration of a sponge in the shape of a sponge royalty free illustration", "a black and white image of a box with the words 'box 2' and 'thoughtful'"], "captions_pred_image": ["a 3d sculpture of an apple on a white background", "an image of a white background with a few small cubes on it"], "question": "which object is whiter", "label": 0}, {"captions": [" a metal-framed wall with red and blue bars in a steel structure.", "a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars."], "sample_ids": ["fefc99453e2d4406a9668d5697224c0f", "1b3945962a4b4cda9fe939dc5d63e789"], "properties": ["color, red, blue, structure", "a room, a cake, a table"], "captions_pred_pc": ["a black and white image of a person holding a toothbrush", "a black and white illustration of an object on a white background"], "captions_pred_image": ["a 3d rendering of a metal frame structure", "a 3d rendering of a white room with various items in it"], "question": "which entity has a room?", "label": 0}, {"captions": [" a house with a green roof and lawn.", " a spiral staircase with a railing in a small building."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "28cae056856c4a8ba9d1a6af5355f831"], "properties": ["roof, green, lawn", "staircase, railing, building"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "a black and white photograph of a light switch"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a 3d model of a staircase in a white room"], "question": "which entity has a railing", "label": 1}, {"captions": [" a red, blue, and green striped tower building.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["8668f9e9d1a64b86b31f260b8056cd19", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["color, red, blue, green", "island, terrain, water"], "captions_pred_pc": ["a black and white drawing of a butterfly on a white background a black and white drawing of a butterfly on a white background royalty free illustration", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a pair of cylindrical towers with a staircase leading up to the top of one of the towers", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": ["a white of a house with a hole in the ceiling.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["2915cbd03e164ac0bb13866c2d68cc65", "c3a82df41875402285608ef13a55df57"], "properties": ["image, house, ceiling", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["above a black and white drawing of a house", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a house with a balcony", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" of a wrecked plane, ship, and bird on a pile of rocks with grass.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["b0c703df20154bbf9fd8707c61137fc5", "a17477b445b3443189dad22f768b888b"], "properties": ["plane, ship, bird", "roof, pillar, stairs"], "captions_pred_pc": ["a black and white watercolor map of the state of ohio", "a black and white image of a square with dots"], "captions_pred_image": ["a black and white photograph of a pile of debris on the ground", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": [" a red, blue, and green striped tower building.", "a white 3d-printed plastic container set with a lid, two small containers, and a hat-like attachment."], "sample_ids": ["8668f9e9d1a64b86b31f260b8056cd19", "1da865c75a5e4a57a17652975dae5474"], "properties": ["color, red, blue, green", "color, white, plastic"], "captions_pred_pc": ["a black and white drawing of a butterfly on a white background a black and white drawing of a butterfly on a white background royalty free illustration", "a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a"], "captions_pred_image": ["a 3d model of a pair of cylindrical towers with a staircase leading up to the top of one of the towers", "a 3d model of a white box, a white lid, and a white container"], "question": "which object is made of plastic", "label": 1}, {"captions": [" of a broken stone wall featuring an angel sculpture.", "a featuring a bench with flowers, a bridge, a white sculpture, a bicycle with a flower, a giraffe, and a palm tree."], "sample_ids": ["aae2c42740a04fd68068f5707111d26f", "12093c89a60941e7884b252bdc05104c"], "properties": ["image is a sculpture of an angel on a wall", "giraffe, bench, flower"], "captions_pred_pc": ["a black and white image of a toilet paper roll", "a black and white drawing of a gear on a white background"], "captions_pred_image": ["a 3d model of a marble sculpture of an angel", "a 3d model of a sculpture made of sticks"], "question": "which entity has a bench with flowers?", "label": 1}, {"captions": ["a featuring a small room with a bunk bed, desk, chair, table, and a blue house.", " a wooden staircase with a railing and table."], "sample_ids": ["dd3a9323ed514ccab330973ff9588015", "956247bea850458199c651037d4b1d7f"], "properties": ["room, bed, desk", "railing, table, staircase"], "captions_pred_pc": ["a black and white drawing of a door", "above a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a small room with a bunk bed", "a 3d model of a table with a staircase"], "question": "which entity has a table?", "label": 1}, {"captions": [" a house with a pink roof, brick walls, and insulated ceiling.", " a small wooden house."], "sample_ids": ["c8936ace72954650b4e2d84246964849", "4cb4dba1237443eb8dc299530fa12521"], "properties": ["roof, color, pink", "house, material, wood"], "captions_pred_pc": ["a black and white drawing of a toilet", "a house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots"], "captions_pred_image": ["a 3d model of a house with a roof", "a 3d model of a small cottage"], "question": "which house is made of wood", "label": 1}, {"captions": [" of a small wooden house with two roofs.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["30fc23ae4edb42609e30e029dede54bd", "c3a82df41875402285608ef13a55df57"], "properties": ["house, roof, wooden", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["of a pair of stainless steel screws on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a small barn", "a white plastic object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a two-story small apartment building with a roof.", " a house with a wooden-framed roof structure."], "sample_ids": ["8d1102e923954604ae7045a7ca14c1f6", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["two-story, roof, building", "roof, material, wood"], "captions_pred_pc": ["a black and white pattern of dots in the shape of the letter c on a white background vector illustration of a black and white pattern of dots in the shape of the letter c on a white background royalty free illustration", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of an apartment building royalty free 3d model preview no 2", "a 3d model of a building with a roof"], "question": "which building has a roof made of wood", "label": 1}, {"captions": [" a white rock formation with pebbles and ice elements.", " a small house on a hill in a field."], "sample_ids": ["94d94f5a75de4bd0a43b08609630876e", "bd873071252047d38160c4a5fdd2c1b7"], "properties": ["color, shape, texture", "house, hill, field"], "captions_pred_pc": ["a black and white drawing of a pair of shoes on a white background royalty free illustration", "a black and white photograph of a piece of paper"], "captions_pred_image": ["a 3d model of a small rocky island in the middle of a body of water", "a black and white image of a small house"], "question": "which entity is a building?", "label": 1}, {"captions": ["a featuring a teddy bear on a rainbow, accompanied by a baby, a cat, a dog, and a drink, with a pink and green sign displaying \"close the sky over dreams.\"", " a wooden table and bench with a deer head and branch on it."], "sample_ids": ["80dfbe37b3d74f11b712ca1ad6570f70", "857d5391612349f4ae6cd854a1ec96de"], "properties": ["image, color, pink", "table, bench, deer"], "captions_pred_pc": ["above a black and white photograph of a dog in a bowl", "a black and white drawing of a table and chairs"], "captions_pred_image": ["a 3d sculpture of an animal on a piece of paper", "a black and white image of a bench and table with a deer's head on the table"], "question": "which image has a deer head on it?", "label": 1}, {"captions": [" a white plastic container with a lid, a small box, a cup, a bottle, and a jar.", " a small white house with windows and a black lid."], "sample_ids": ["20a02705a66f460492e07345e84a62ed", "4b40af369c1149949f5ccb68becd8430"], "properties": ["a box, a cup, a bottle, a jar", "white, windows, lid"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "above a black and white image of dots on a white background"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic cap, and a plastic container", "a 3d model of a white house with three windows"], "question": "which object has a lid", "label": 1}, {"captions": ["a featuring white and red cubes, and a pink and white chair.", " a building with yellow lines."], "sample_ids": ["f2c44a82ba744ba8b93e9a1c2272c117", "f18e34286cf54876874f55ecc9018492"], "properties": ["color, white, red, pink", "color, yellow, lines"], "captions_pred_pc": ["a black and white illustration of a house made of dots", "a black and white drawing of a map"], "captions_pred_image": ["a 3d model of a white structure with stairs", "a drawing of an airplane flying over a city"], "question": "which entity has more lines", "label": 1}, {"captions": [" a modern office building with a green door, green roof, windows, and blue lights.", " a house with a flat roof structure."], "sample_ids": ["d6087023095446fbadef1721478373b2", "abc52d210d71415296730bb00352ce6f"], "properties": ["door, roof, window", "roof, flat, structure"], "captions_pred_pc": ["a black and white drawing of a toilet brush", "a black and white drawing of a window with dots around it"], "captions_pred_image": ["a 3d model of an apartment building", "a 3d model of a house with a roof"], "question": "which building has a flat roof", "label": 1}, {"captions": [" a destroyed car with rusted, broken metal and torn paper.", " a four-legged metal workbench with shelves."], "sample_ids": ["3fe31c3bf5cd4574a8ca02222411a988", "e93b633d477942d9b79ef8ab566473d6"], "properties": ["metal, rusted, paper", "Four legs, Metal, Shelf"], "captions_pred_pc": ["a black and white drawing of a person sitting in a chair", "for a black and white illustration of a cross"], "captions_pred_image": ["a black and white image of a piece of debris on the ground", "a 3d model of a table with four legs"], "question": "which object is made of metal", "label": 1}, {"captions": [" a building featuring graffiti, chinese writing, a door, a broken window, and an interior bathroom with a sink.", " an old building with windows, doors, and a balcony on a street."], "sample_ids": ["1ee3df6f94ea4c329a9c5245634e34d5", "706fb93f885d42f594e0ebbba632d2f2"], "properties": ["graffiti, chinese writing, door", "building, balcony, street"], "captions_pred_pc": ["a black and white illustration of a bridge with dots", "in 15 words or less a black ink brush stroke on a white background"], "captions_pred_image": ["a black and white image of a bathroom with a sink and a toilet", "a 3d model of an old building"], "question": "which building has a balcony", "label": 1}, {"captions": [" a small house with a blue roof.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["fa21afd3a99d448cb23fa527a784769c", "97e000ff41094665afd94ea565da8b13"], "properties": ["roof, color, blue", "roof, material, wood"], "captions_pred_pc": ["a house made of dots on a white background a house made of dots on a white background royalty free illustration", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d model of a house with a porch and balcony royalty-free 3d model preview no.2", "a 3d model of the roof of a building"], "question": "which roof is made of wood", "label": 1}, {"captions": [" a snowy small village with farm buildings and a fence.", " a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles."], "sample_ids": ["6bb669534ccc434f9ab4d7b39bae3510", "bd7aab78974643f5a0660c699daf8eb3"], "properties": ["building, fence, snowy", "roof, color, yellow"], "captions_pred_pc": ["a black and white drawing of a boat on the water", "a black and white drawing of a room"], "captions_pred_image": ["a 3d model of a small village in the snow royalty free 3d model preview no. 3", "a 3d model of a table and chairs on a white background"], "question": "which building has a roof that is the color of yellow", "label": 1}, {"captions": [" a small rocky island with a brown and white rug featuring a puddle on it.", " of a small island featuring a white lighthouse, a fountain, and a grassy crater."], "sample_ids": ["4806b382466247ad9265fc8240a22d3d", "2a30e69498ff4fd1a33c1fb72286f553"], "properties": ["rocky, rug, puddle", "lighthouse, fountain, crater"], "captions_pred_pc": ["a black and white image of a long, curved line on a white background", "a black beanie with sparkles on a white background"], "captions_pred_image": ["a 3d image of a rug on the ground, with a small puddle in the center royalty free 3d model preview no. 1", "a black and white image of an object on top of a pedestal"], "question": "which island has a fountain?", "label": 1}, {"captions": [" a green electrical utility box with a warning sign and a small blue box on a concrete base.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["8e39d4766ed4444ea527d6c5ea33a5ef", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["color, base, warning", "island, terrain, water"], "captions_pred_pc": ["a black and white illustration of a box with dots on it", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of an electrical box royalty-free 3d model preview no.2", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" a car dealership interior featuring a showroom, repair shop, and various elements like a booth, bed, and ceiling light.", " a house with a wooden-framed roof structure."], "sample_ids": ["3e22efacf9ee40a1a6b2e4b72a7314d2", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["a, booth, bed", "roof, material, wood"], "captions_pred_pc": ["a black and white drawing of a tv screen with dots all over it royalty free illustration", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d rendering of a room with a black and white color scheme", "a 3d model of a building with a roof"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": ["a featuring a train, large ship, long metal pipe, boat, black map of kenya, and a black piece of plastic.", "a featuring a boat, table, chairs, umbrella, and solar panel."], "sample_ids": ["49c5a9d42ba64fb2b681ef583d700b98", "0f0eb3a198d341d28f809b6d7634be8a"], "properties": ["a train, a ship, a boat", "boat, table, chairs, umbrella, solar panel"], "captions_pred_pc": ["above a black and white image of a long, curved line on a white background", "a black and white illustration of a boat with an umbrella"], "captions_pred_image": ["a 3d model of a submarine", "a 3d model of a boat, a table, chairs, and an umbrella"], "question": "which entity has a solar panel", "label": 1}, {"captions": [" of a sword with a wooden handle.", " of a hammer with a long metal handle."], "sample_ids": ["bf448dbb4b6a43d89b2514929e8f7c43", "30f4b6bcbbb44f568cab4fd439d05145"], "properties": ["handle, material, wood", "handle, metal, long"], "captions_pred_pc": ["a black and white image of a sword on a white background", "a black and white toothbrush on a white background"], "captions_pred_image": ["a black and white image of a sword with two blades", "a hammer with a wooden handle and metal head"], "question": "which handle is made of metal", "label": 1}, {"captions": [" of a house with a pink roof.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["6162909df6294848a8eea83c3aa9585b", "6b745457e06840119058883b35f78f58"], "properties": ["color, roof, pink", "roof, color, blue"], "captions_pred_pc": ["a black and white drawing of the letter 'p' on a white background illustration", "a black and white image of a building with dots"], "captions_pred_image": ["a 3d model of a house in the style of the 1920s and 1930s", "a 3d model of a house with a steeple on top"], "question": "which house has a blue roof", "label": 1}, {"captions": ["a featuring a snake, an eel, a cube, a sword, a dice, a stick, and a blue ball.", " a house with a roof and beams."], "sample_ids": ["7940570f74c14baa83ebf5f50cd2a720", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["a snake, an eel, a cube, a sword, a dice, a stick, a blue ball", "roof, beams, house"], "captions_pred_pc": ["a black mascara brush on a white background", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife", "a 3d model of a building with a roof"], "question": "which entity is a building?", "label": 1}, {"captions": [" of a cactus bunny planter with green leaves in a white bowl.", " of a toy mushroom character with a white and brown head."], "sample_ids": ["03614cc7ab6943e5857f17c5814da146", "ae8a73809d4647c09cc82f403e47de1d"], "properties": ["color, bowl, green", "color, head, white and brown"], "captions_pred_pc": ["a black and white illustration of a dandelion in the shape of a dandelion on a white background illustration of a black and white illustration of a dandelion in the shape of a dandelion on a white background royalty free illustration", "a black and white illustration of a butterfly sitting on a dandelion stock illustration"], "captions_pred_image": ["3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3", "a 3d model of a gray and white cartoon character"], "question": "which entity has a white and brown head?", "label": 1}, {"captions": [" of a pillow featuring a hot dog shape and a bag of chips with a dragon design.", " a clay pot with holes in it."], "sample_ids": ["4ae6ac813d584d12a5d5d608a595bfe5", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["shape is hot dog, design is dragon, color is black", "hole, material, clay"], "captions_pred_pc": ["a black and white illustration of an eye with dots", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["a black and white photo of a pillow with a soda can on it", "a clay sculpture of a face with holes in it"], "question": "which object is made of clay", "label": 1}, {"captions": [" of a wooden stand featuring two legs, six balls, and a row of teddy bears, resembling a puzzle toy, game board, and bookshelf.", " a multicolored cube representing a protein, featuring pink, yellow, red, and green hues."], "sample_ids": ["e36ba9c060cd49f48a0acc1790fcf049", "ee7c3113f2754f9cbe8980b1b7cc4eff"], "properties": ["resembles, toy, bookshelf", "color, shape, color"], "captions_pred_pc": ["a black and white image of a book cover", "a black and white pattern of small dots on a white background a black and white pattern of small dots on a white background illustration"], "captions_pred_image": ["a 3d rendering of a wooden display stand with six cups on it", "a 3d model of a piece of fabric"], "question": "which entity is a cube?", "label": 1}, {"captions": [" of a black and white coffee table with a laptop, featuring a performance studio and ceiling grid structure.", " a large orange pumpkin."], "sample_ids": ["a177693cc8c7428292680816001b48c6", "684df453535b4ec28c4d5b64dcd60f59"], "properties": ["black, white, laptop", "orange, large, pumpkin"], "captions_pred_pc": ["a black and white drawing of a room with dots on the floor", "a black and white circular pattern on a white background a black and white circular pattern on a white background royalty free illustration"], "captions_pred_image": ["a dishwasher with a dish inside it", "a 3d model of a pumpkin on a white background"], "question": "which object is orange?", "label": 1}, {"captions": ["3d white geometric shapes and paper clip.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["8760487af6c748ac8111f09113a77b16", "c3a82df41875402285608ef13a55df57"], "properties": ["3D, white, paper clip", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["of a black and white knife on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d printed diamond shaped object on a white background", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a red and black striped battery box resembling a radiator, with elements of black and white striped walls.", " a house with a roof structure, featuring a brick wall and suspended box."], "sample_ids": ["6d4cb53d5953447aaf3c44872cd3ae6f", "1a7bfcf3755142bab90d3d7cb02d0f2c"], "properties": ["color, shape, material", "roof, structure, wall"], "captions_pred_pc": ["a black and white drawing of a person sitting on a bench royalty free illustration", "a black and white illustration of a group of dots on a white background"], "captions_pred_image": ["a 3d rendering of a black and white building", "a 3d model of a building with a roof"], "question": "which entity has a roof structure", "label": 1}, {"captions": [" of a white plastic tube with a hole and a chip on it.", " a gray, metal pillar/cylinder."], "sample_ids": ["9968e06a62e8487ea33460e640abc573", "11391e6bab574dc0be8f2440fbc3b724"], "properties": ["color is white, material is plastic, shape is tube", "color is gray, material is metal, shape is cylinder"], "captions_pred_pc": ["a black and white image of a broom on a stand", "of a black candle on a white background"], "captions_pred_image": ["a white object on a gray background", "a 3d model of a candlestick"], "question": "which object is made of metal", "label": 1}, {"captions": [" a pink room featuring a bed, desk, window, and lamp.", " a stone, wood, rock, sliced bread, and a skull with a blue hat."], "sample_ids": ["395af20de6fe49dbbbb030f0e452cbe1", "0169af65ffc64bbf8e2fe6c6de08d485"], "properties": ["bed, desk, window", "hat, skull, bread"], "captions_pred_pc": ["of a black and white drawing of a curved line", "a black and white illustration of a skull in the shape of dots"], "captions_pred_image": ["a 3d model of a bedroom royalty free 3d model preview no.2", "a black and white image of a stone sculpture"], "question": "which entity has a skull?", "label": 1}, {"captions": [" of a house with multiple roofs and a suspended ceiling, featuring a white and purple tray.", "a low poly of a plant on a white object, resembling a paper or plastic bag."], "sample_ids": ["7907916e8f524df5b16eb566497db83e", "d49d8ed244094349a99e4faca05e0690"], "properties": ["color, roof, tray", "low poly, plant, white"], "captions_pred_pc": ["a black and white image of a metal object", "a black and white illustration of a dandelion on a white background a black and white illustration of a dandelion on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d model of a plant growing out of a rock"], "question": "which object is white", "label": 1}, {"captions": ["a white ornate airplane design.", " a house with a green, wooden-structured roof."], "sample_ids": ["23ec5bf5dd154e4a9df3194da7b8267a", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["color, shape, material", "roof, color, green"], "captions_pred_pc": ["of a black and white image of an ornate door knocker", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a 3d model of a decorative ornament on a white background", "a 3d model of a house with a triangular roof"], "question": "which entity is made of wood", "label": 1}, {"captions": ["a collection featuring various characters, including a man holding a bat, a person doing yoga, a jumping individual, a person with a stick, a flying bird, a man with outstretched arms, a man in a hat, and a suited man flying through the air.", " of an egyptian sarcophagus."], "sample_ids": ["5409e95467e54cbdaddb17695fe563e6", "70aa484af2ab44149a608dd81a6ff459"], "properties": ["a, character, person", "sarcophagus, material, wood"], "captions_pred_pc": ["a 3d model of a person", "a black and white circular pattern on a white background"], "captions_pred_image": ["a 3d model of a bird in mid-flight royalty free 3d model preview no. 3", "a black and white photograph of a sphere with egyptian hieroglyphics on it"], "question": "which object is made of wood", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", "modern tan leather lounge chair."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "409916a53a0d434599e2a3f52bfe9396"], "properties": ["color, shape, and size", "color, tan, leather"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "a black and white illustration of an object in the shape of a butterfly on a white background 3d illustration of a black and white illustration of an object in the shape of a butterfly on a white background vector illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustr"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a gray leather lounge chair with a metal base"], "question": "which entity is made of leather", "label": 1}, {"captions": [" a black and white cube-shaped building with a staircase.", "a 3d white cube featuring a hole, wheels, and a diamond."], "sample_ids": ["587e65f2d904440488a98dfa9a4e9dbe", "e44009d33258425e8efedfbc6823bf70"], "properties": ["shape is cube, color is black, white", "- color is white- shape is cube- material is plastic"], "captions_pred_pc": ["above a black and white photograph of a sculpture", "for a black and white image of a toothbrush in the shape of a toothbrush"], "captions_pred_image": ["a black and white 3d model of a building", "a 3d model of a white cube"], "question": "which object is white", "label": 1}, {"captions": [" the white text \"ekberkaslan\" with a row of white cubes and a numbered box.", " of an egyptian temple featuring a pyramid, sphinx, and a building with a roof."], "sample_ids": ["87ee30d475f34b799c24bf7ef3a7b540", "40022305eaf44442915c9db03e25f781"], "properties": ["- color is white- shape is cubes- number is 1", "image is a egyptian temple featuring a pyramid, sphinx, and a building with a roof"], "captions_pred_pc": ["a close up of a black and white striped scarf", "above a black and white image of an abstract design"], "captions_pred_image": ["a 3d image of the word ebercaskalan on a white background", "a 3d model of an ancient temple"], "question": "which entity is a building?", "label": 0}, {"captions": [" a house with a blue roof, chimney, and wooden-beamed ceiling.", " a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light."], "sample_ids": ["b380dd4800124a8d96424a504eb0ec6a", "5ea0962b100b4fccb761ed84afe027b5"], "properties": ["roof, color, blue", "house, table, chair"], "captions_pred_pc": ["of a white lace clutch purse on a white background", "above a black and white photograph of an open door"], "captions_pred_image": ["a 3d model of a building with many windows", "a 3d rendering of a small white table with a chair"], "question": "which house has a table and chair?", "label": 1}, {"captions": ["a red circle, red and white arrow, mouse, and child's handwriting.", " a large metal building with a roof and truss structure."], "sample_ids": ["3c233f87bf264968a7f0660b9eac9e4a", "b85a99699ccd4bcba213322113bb253d"], "properties": ["red, mouse, handwriting", "roof, truss, structure"], "captions_pred_pc": ["in 15 words or less a black and white image of a person's hand holding a pencil and drawing on a piece of paper", "of a metal grate on a white background"], "captions_pred_image": ["a black and white drawing of a person's hand holding a pencil", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": [" a wooden house with a roof and framing.", " a small house on a hill in a field."], "sample_ids": ["4634a9bdf54549a99f68be77f1464b0a", "bd873071252047d38160c4a5fdd2c1b7"], "properties": ["roof, framing, material", "house, hill, field"], "captions_pred_pc": ["a black and white drawing of an abstract pattern", "a black and white photograph of a piece of paper"], "captions_pred_image": ["a 3d model of a barn structure", "a black and white image of a small house"], "question": "which house is on a hill?", "label": 1}, {"captions": [" a white and black chair with a black handle and armrest.", " a large metal building with a roof and truss structure."], "sample_ids": ["cf1f435c54b046f68d6603cd3369a94f", "b85a99699ccd4bcba213322113bb253d"], "properties": ["color, black, white, handle, armrest", "roof, truss, structure"], "captions_pred_pc": ["a black and white drawing of a geometric shape", "of a metal grate on a white background"], "captions_pred_image": ["a 3d rendering of a white chair with a black handle", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": [" featuring a moss-covered rock, mushroom in grass, and green leaves with a brown spot.", " of a torn piece of paper, a hole in a wall, and a guitar with a sign on it, accompanied by a piece of metal, wood, and a large rock."], "sample_ids": ["34ebe81ae93841ca829efd15aee4d8c1", "cc4ccf85d4c1425cb5975b8b5664d38a"], "properties": ["moss, mushroom, grass", "paper, hole, sign"], "captions_pred_pc": ["for a black and white illustration of a cloud on a white background", "a silhouette of a map of the state of karnataka, india on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor", "an image of a torn piece of paper in the shape of a map"], "question": "which entity has a sign on it?", "label": 1}, {"captions": [" a small building with a yellow roof, featuring a box, a chair, and a plane flying overhead.", " of two rocks with ice elements."], "sample_ids": ["a2354f13774340d392fbf33564934aab", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["building, roof, yellow", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white image of a cell phone", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d rendering of a machine with a conveyor belt", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a yellow and white structure with yellow poles.", "a pixelated-textured purple sphere ."], "sample_ids": ["bada91e216fd486d9e3e356d48978f33", "fb68393941804e769d5c9b372864a642"], "properties": ["color, shape, poles", "texture, color, shape"], "captions_pred_pc": ["a black and white drawing of a dotted pattern on a white background a black and white drawing of a dotted pattern on a white background royalty free illustration", "a black and white square made of dots on a white background"], "captions_pred_image": ["a 3d model of a building with multiple levels", "a 3d model of a gray sphere on a white background"], "question": "which entity is a sphere?", "label": 1}, {"captions": [" a destroyed building and a damaged yellow-green machine.", " a destroyed house and plane amidst a town with buildings."], "sample_ids": ["01406e7034fe4b7da32494c6cbf260f2", "0fd3ddca09194b8f94ef731af3b64a08"], "properties": ["building, color, yellow-green", "house, plane, town"], "captions_pred_pc": ["a black and white photograph of a piece of paper", "above a black and white drawing of a piece of paper"], "captions_pred_image": ["a 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged", "a 3d model of a damaged building"], "question": "which entity has a plane?", "label": 1}, {"captions": [" a small island with trees, water, and a river.", " a house with a purple roof and glass block structure."], "sample_ids": ["5a9c593092c04deaa0f17a1c28a79476", "e8ac7de076e54f07ace1a0ead07f6f57"], "properties": ["water, river, island", "roof, color, purple"], "captions_pred_pc": ["in 15 words or less a black and white polka dot pattern on a white background", "a black and white image of a fire hydrant on a white background fire hydrant on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a snowy landscape with trees in the foreground royalty free 3d model preview no.2", "a 3d model of a building with a glass roof"], "question": "which entity has a roof that is the color purple", "label": 1}, {"captions": [" a small house with stairs and a balcony.", " a wooden staircase with a railing and table."], "sample_ids": ["0fbc5f16d301450c820b1f2158fd4f69", "956247bea850458199c651037d4b1d7f"], "properties": ["balcony, stairs, house", "railing, table, staircase"], "captions_pred_pc": ["a black and white image of a square with dots on it", "above a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a building with two floors and a balcony", "a 3d model of a table with a staircase"], "question": "which staircase has a table", "label": 1}, {"captions": [" of a wooden stand featuring two legs, six balls, and a row of teddy bears, resembling a puzzle toy, game board, and bookshelf.", "a white of a man with arms outstretched."], "sample_ids": ["e36ba9c060cd49f48a0acc1790fcf049", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["resembles, toy, bookshelf", "image, color, white"], "captions_pred_pc": ["a black and white image of a book cover", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a 3d rendering of a wooden display stand with six cups on it", "a 3d model of a man with his arms outstretched"], "question": "which image is white", "label": 1}, {"captions": [" a small bedroom with wooden floors, walls, roof, and shelf.", " a house with a roof and beams."], "sample_ids": ["e602ac60041f4b4f84c044161e478781", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["floor, wall, roof", "roof, beams, house"], "captions_pred_pc": ["above a black and white image of a decorative metal bar", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a room with wooden walls and a rug on the floor", "a 3d model of a building with a roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" of a black flat screen lcd monitor on a stand.", " of a toy mushroom character with a white and brown head."], "sample_ids": ["0cc63371c12344e892d1c7be5a9eb782", "ae8a73809d4647c09cc82f403e47de1d"], "properties": ["screen, monitor, stand", "color, head, white and brown"], "captions_pred_pc": ["a black and white close-up of a television on a stand", "a black and white illustration of a butterfly sitting on a dandelion stock illustration"], "captions_pred_image": ["a 3d model of a computer monitor royalty free 3d model preview no. 2", "a 3d model of a gray and white cartoon character"], "question": "which entity has a white and brown head", "label": 1}, {"captions": [" of a white spiral, earbuds, ceiling light, and silver ring with black and white scissors and design.", " a small house on an island with a boat and a bird on a rock."], "sample_ids": ["c69f60b389124ad9b4f81c64ec332054", "1e56a92a0ddc41e59694bd1ad1656149"], "properties": ["earbuds, light, ring", "house, rock, bird"], "captions_pred_pc": ["a black and white drawing of a needle and thread", "a black and white drawing of a boat in the middle of a body of water"], "captions_pred_image": ["a black and white illustration of a pair of sunglasses and a pair of scissors next to each other on a white background", "a 3d rendering of a house on a rock"], "question": "which entity has a bird on a rock?", "label": 1}, {"captions": ["a 3d object featuring a chair with a glass top and a newspaper, a table with a glass top, and a white and silver ceiling light fixture.", "two white spheres in a ."], "sample_ids": ["88c6f2a973c449b8bf45b10a569e0e21", "1c02212a35134545ab63ab180d629c31"], "properties": ["a, chair, table, light", "two, spheres, white, in, a"], "captions_pred_pc": ["a black and white illustration of a person riding a bicycle", "a black and white illustration of two spheres"], "captions_pred_image": ["a 3d model of a chair in the shape of a spiral staircase 3d model of a chair in the shape of a spiral staircase 3d model of a chair in the shape of a spiral staircase 3d model of a chair in the shape of a spiral staircase 3d model of a chair in the shape of a spiral staircase 3d model of a chair in the shape of a spiral staircase 3d model of a chair in the shape of a spiral staircase 3d model of a chair in the shape of a spiral staircase 3d model of a chair in the shape of a spiral staircase 3d model of a chair in the shape of a spiral staircase 3d model of a chair in the shape of a spiral staircase 3d model of a chair in the shape of a spiral staircase 3d model of a chair in the shape of a spiral staircase 3d model of a chair in the shape of a spiral staircase 3d model of a chair in the shape", "a 3d model of a white ball on a gray background"], "question": "which object has a glass top", "label": 0}, {"captions": [" a white room featuring a round table and the words \"may lin\" on various elements.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["459f89a281bc456d8a6aeb1e8fe25e8a", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["color, room, table", "island, terrain, water"], "captions_pred_pc": ["a black square on a white background", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a white room with a round table in the center", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" a cityscape featuring various buildings, trees, an airport runway, and a plane.", " a large building featuring stairs, a clock tower, a balcony, and a roof."], "sample_ids": ["9b45036d3f0342a78db9a25938dc77fc", "e7c78316f9cb4b8aad57a9c933f5278b"], "properties": ["building, tree, plane", "building, roof, balcony"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a snowflake in the shape of a snowflake royalty free illustration", "a black and white illustration of a group of dots in the shape of a square on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of an industrial area with buildings and trucks in the foreground", "a 3d model of a building with a clock tower"], "question": "which building has a roof", "label": 1}, {"captions": ["green toy sand bucket and shovel with a squirt gun.", " a flying bird, resembling a crow and a pigeon."], "sample_ids": ["ae173b4afc4d4b0499f1e4e55d647c06", "5ec78c8b6ab54f739adb0b46d216a454"], "properties": ["color, green, squirt gun", "bird, resembles, crow, pigeon"], "captions_pred_pc": ["a bucket with a sponge and a sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in", "above a black and white illustration of an airplane on a white background"], "captions_pred_image": ["a 3d model of a bucket and shovel royalty free 3d model preview no.2", "a black and white image of a bird in flight"], "question": "which entity is a bird?", "label": 1}, {"captions": [" a white building, table, and various piles of paper, including a low-poly object.", "a featuring a building, a square, a cloud with a square in the middle, a group of people in a room and a field, and clouds floating in the sky."], "sample_ids": ["515210fb031f4ec89021ee8ce9e432e9", "8557a15b9f244d2cbf16786dbc8b7b25"], "properties": ["- building is white, table is white, piles of paper are white", "building, room, sky"], "captions_pred_pc": ["a black and white drawing of a piece of paper", "above a black and white image of a person's hand holding a paintbrush"], "captions_pred_image": ["a 3d model of a snowy landscape", "a 3d rendering of white clouds floating in the air"], "question": "which entity has a building", "label": 1}, {"captions": ["a 3d lego character wearing a black helmet, gloves, and holding a gun.", " a rusted metal barrel with a yellow and red warning sign and stripe on it."], "sample_ids": ["f71842519d2f44ceb9fa57e0c0db4aa0", "5a49ad82ef7a4d33badea2261720f518"], "properties": ["- color is black- material is plastic- shape is 3d", "rusty, warning, metal"], "captions_pred_pc": ["a sculpture in the form of a skull made of silver dots on a white background stock illustration \u00a9 iStock/Getty Images", "a black and white drawing of dots on a white background a black and white drawing of dots on a white background royalty free illustration"], "captions_pred_image": ["a lego ninja turtle wearing a black helmet and gloves", "a black and white photograph of a barrel"], "question": "which object is made of metal", "label": 1}, {"captions": ["a featuring a mossy rock, a piece of wood, a rocky island, a plane, a tree branch, a gray rock, a rocky mountain, and a moss-covered tree branch.", " a four-legged metal workbench with shelves."], "sample_ids": ["09e5288a9e98421985ee6e0042b3c325", "e93b633d477942d9b79ef8ab566473d6"], "properties": ["mossy, rock, rocky", "Four legs, Metal, Shelf"], "captions_pred_pc": ["a black and white illustration of a small island in the middle of a body of water", "for a black and white illustration of a cross"], "captions_pred_image": ["a 3d model of the comet 67p/churyumov-gerasimenko", "a 3d model of a table with four legs"], "question": "which object has four legs", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " a house with a green, wooden-structured roof."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["color, white, black, white", "roof, color, green"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d model of a house with a triangular roof"], "question": "which object has a green roof", "label": 1}, {"captions": ["a 3d low-poly helmet model with green and beige colors and wings.", "a small white 3d boat model with a curved wing and knife."], "sample_ids": ["b9f40c80d70e432390780273137dcbc0", "cafc467aff2643e8b70149c4944263ee"], "properties": ["color, material, texture", "wing, knife, boat"], "captions_pred_pc": ["a black and white image of a spider's head", "a black umbrella on a white background"], "captions_pred_image": ["a 3d model of a futuristic helmet with wings", "a 3d model of a rowing boat royalty free 3d model preview no 3"], "question": "which entity has a wing", "label": 1}, {"captions": [" a blue and white building structure with a table and suspended ceiling.", " of a plague mask with a rusty, horned, wooden helmet and a crow's head design."], "sample_ids": ["fc63507c452c4a4b89f614f2b8bff76a", "2b0896f810074399a5ae7d6dbab8c330"], "properties": ["color, table, ceiling", "- material is wood, rusty, horned"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a room", "in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a table with multiple tables stacked on top of each other", "3d model of a plague doctor's mask"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a small white barn with a metal roof.", " a building featuring yellow columns, a yellow roof, and a wooden structure."], "sample_ids": ["4ca3342a96824684845f7d0e062ab176", "0ce6a4102f4f40e2a0084938b0a93941"], "properties": ["roof, metal, white", "structure, columns, roof"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a house made of dots", "a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a barn", "a 3d model of a building with multiple levels"], "question": "which building has a wooden structure", "label": 1}, {"captions": [" a house featuring a purple roof and a suspended ceiling with a light fixture.", " a house with a green roof and lawn."], "sample_ids": ["579b43057ef74bd08d06bdd3e8d973a0", "84adcf124ee742a49f7b1fe2104e072d"], "properties": ["roof, purple, suspended", "roof, green, lawn"], "captions_pred_pc": ["a black and white image of a piece of paper with a pattern of dots on it", "a black and white image of a knife on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of the roof of a building royalty free 3d model no.", "a 3d model of a small apartment building royalty free 3d model preview no.2"], "question": "which roof is green", "label": 1}, {"captions": [" a row of houses featuring roof structures with green roofs and tiled roof slats.", " of a house with a roof truss, chimney, and suspended ceiling."], "sample_ids": ["aef9b23a78a7450286a961cc13448d00", "9401dfc901b2447a9c0eb27da56854d7"], "properties": ["roof, green, tiled", "roof truss, chimney, suspended ceiling"], "captions_pred_pc": ["of a black and white photo of a decorative metal wall hanging", "in 15 words or less a black and white illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a"], "captions_pred_image": ["a 3d model of a set of stainless steel shelves", "a 3d model of a house with a roof"], "question": "which house has a roof truss", "label": 1}, {"captions": [" a house with a roof structure, featuring a brick wall and suspended box.", " a house featuring a roof with truss system, framing, insulation, and a ceiling light."], "sample_ids": ["1a7bfcf3755142bab90d3d7cb02d0f2c", "39876e69e3914d99a07e0dc59611c5c0"], "properties": ["roof, structure, wall", "roof, truss system, framing"], "captions_pred_pc": ["a black and white illustration of a group of dots on a white background", "a black and white drawing of a window with dots all over it"], "captions_pred_image": ["a 3d model of a building with a roof", "a 3d model of the roof of a house"], "question": "which roof is more complex", "label": 1}, {"captions": [" a crab with long legs, wires, and tentacles, resembling a hybrid of an octopus, squid, spider, and robot.", " a house with a roof structure and toothbrushes."], "sample_ids": ["ec5914b53b6a4cde8de4820050bc46c5", "7632d1ba4e8144c19484c263b6074d0c"], "properties": ["resembles, octopus, squid, spider, robot", "house, roof, toothbrushes"], "captions_pred_pc": ["a black and white illustration of a jellyfish", "a black and white illustration of the letter 'b' isolated on a white background illustration"], "captions_pred_image": ["a 3d model of a robotic octopus on a white background royalty free 3d model preview no.2", "a 3d rendering of a white box with a lot of blades"], "question": "which entity has a roof structure", "label": 1}, {"captions": [" a house with roof trusses and wooden beams on a suspended ceiling.", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["3c2e3a3670b042069bd8290e2c357702", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["roof trusses, beams, suspended ceiling", "lizard, rock, stuffed animal"], "captions_pred_pc": ["above a black and white drawing of a building", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a house with a roof in progress royalty free 3d model preview no. 1", "a 3d model of a vehicle with wheels and tires"], "question": "which entity has a stuffed animal?", "label": 1}, {"captions": [" a car dealership interior featuring a showroom, repair shop, and various elements like a booth, bed, and ceiling light.", " a wooden billiard table with legs."], "sample_ids": ["3e22efacf9ee40a1a6b2e4b72a7314d2", "b20ad62516fa467ba6e8de063998e8e4"], "properties": ["a, booth, bed", "legs, material, wood"], "captions_pred_pc": ["a black and white drawing of a tv screen with dots all over it royalty free illustration", "a black and white drawing of a rectangular shaped object"], "captions_pred_image": ["a 3d rendering of a room with a black and white color scheme", "a black and white image of a pool table"], "question": "which object has legs", "label": 1}, {"captions": [" of a small blue and green gazebo with a table and chairs.", " a small house on an island with a boat and a bird on a rock."], "sample_ids": ["0a3d553ed5d54c9794494af4f7a7e1c6", "1e56a92a0ddc41e59694bd1ad1656149"], "properties": ["color, gazebo, table", "house, rock, bird"], "captions_pred_pc": ["a black and white illustration of a gazebo in the middle of a field of polka dots stock photography \u00a9 2018 iStock", "a black and white drawing of a boat in the middle of a body of water"], "captions_pred_image": ["a 3d model of a small gazebo with a fountain in the center", "a 3d rendering of a house on a rock"], "question": "which entity has a bird on a rock?", "label": 1}, {"captions": [" of a yellow bird with black eyes.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["6354119423d14471a803b77aa539b2eb", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["Eye color is black, Eyes are black, Color is yellow", "island, terrain, water"], "captions_pred_pc": ["a black and white image of a flower in the shape of a butterfly", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a bird's head with a hole in it", "a 3d image of a small island in the middle of a lake"], "question": "which entity has a rocky terrain", "label": 1}, {"captions": [" a tall glass tower featuring blue, green, and white squares.", " a small house with a blue roof, a door, and a pool."], "sample_ids": ["405d68eda26c401fbcddc7e3a457c74e", "40c52c2d278345c5b4e8d00a991271dc"], "properties": ["color, shape, height", "door, roof, pool"], "captions_pred_pc": ["of a black and white photo of a pair of earrings", "of a black and white photo of a window with fringes"], "captions_pred_image": ["a black and white photograph of a metal sculpture on a pedestal", "a 3d model of a small house"], "question": "which entity has a door?", "label": 1}, {"captions": [" a building with a roof structure, featuring a wooden truss and ceiling with a light.", " of a small white building with stairs and a lid."], "sample_ids": ["cb42ecb7a3fd4eba99f166150ecbc9a7", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["roof structure, truss, ceiling", "building, stairs, lid"], "captions_pred_pc": ["a black and white image of a stainless steel sculpture", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a barn royalty free 3d model preview no 2", "a 3d model of a white box on a gray background"], "question": "which building has a lid?", "label": 1}, {"captions": ["a 3d object featuring elements of a green and purple lamp, a cartoon character with a light bulb, an ice cream cone, a syringe, a light pole, a pen with a face, and a doctor's stethoscope.", " a small white bookcase-like building with stairs and a light fixture."], "sample_ids": ["42d3657ce7bc4b5dbeb7f444e089a715", "5f99eb9d1f1e4d57b5690446f832c841"], "properties": ["a lamp, a syringe, a light pole", "building, color, white"], "captions_pred_pc": ["a spoon with black dots on a white background spoon with black dots on a white background, 3d illustration royalty free stock photo", "in 15 words or less a black and white image of the letter 'f' made up of dots"], "captions_pred_image": ["a 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop", "a 3d model of a bookshelf on a white background"], "question": "which entity is a building?", "label": 1}, {"captions": [" a small house with a tree and a rock.", "a featuring a bench with flowers, a bridge, a white sculpture, a bicycle with a flower, a giraffe, and a palm tree."], "sample_ids": ["9dc392a7f6e444e5bfb720684d6f864a", "12093c89a60941e7884b252bdc05104c"], "properties": ["house, tree, rock", "giraffe, bench, flower"], "captions_pred_pc": ["in 15 words or less the image depicts a white square on a white background with black dots all over the square stock illustration", "a black and white drawing of a gear on a white background"], "captions_pred_image": ["a 3d model of a small house with a tree in front of it", "a 3d model of a sculpture made of sticks"], "question": "which entity has a bench with flowers?", "label": 1}, {"captions": ["a featuring a plane, a small plane, a bird, and a dragonfly all flying in the air.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["f39783d05dec49e49482c407d656e0f7", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["a, plane, small plane", "island, mountain, grass"], "captions_pred_pc": ["of a black and white photo of an airplane propeller", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d printed model of a fighter plane in the air", "a 3d image of a small island in the middle of a lake"], "question": "which entity has grass?", "label": 1}, {"captions": [" of white spheres resembling a molecule.", "a white glass beer mug."], "sample_ids": ["9d2c94d03ca745948b8cb4e8cafddb1c", "1d686cbd3e9a4c629a43088658989286"], "properties": ["color, shape, number", "color, white, glass"], "captions_pred_pc": ["of a black and white 3d model of a molecule on a white background a black and white 3d model of a molecule on a white background royalty free illustration", "a black and white drawing of a beer mug on a white background"], "captions_pred_image": ["a 3d sculpture of a white ball on a gray background", "a 3d model of a glass pitcher"], "question": "which object is made of glass", "label": 1}, {"captions": [" of a cracked, holey cheese-like stone with black markings and writing.", " a small wooden house with a green roof."], "sample_ids": ["0316725634c64f0b96ef60e7505c1b34", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["description, material, shape", "roof, color, green"], "captions_pred_pc": ["a black and white illustration of a sponge on a white background", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d model of an arrowhead on a white background royalty free 3d model preview no. 3", "a 3d model of a house with a ladder"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a large building with a roof and windows.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["32d1fbd3ee91426882290305f70021e6", "c3a82df41875402285608ef13a55df57"], "properties": ["roof, windows, building", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["of a black and white photo of a diamond buckle", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of an apartment building royalty free 3d model preview no.2", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a small white building with a door, resembling a box-shaped house.", " a small white house with windows and a black lid."], "sample_ids": ["1b5fe88d0ff149ae9d8b4eb455c5c90c", "4b40af369c1149949f5ccb68becd8430"], "properties": ["shape is box, color is white, door is present", "white, windows, lid"], "captions_pred_pc": ["a black and white image of a person standing in front of a white background", "above a black and white image of dots on a white background"], "captions_pred_image": ["a 3d model of a white, open shelving unit", "a 3d model of a white house with three windows"], "question": "which house has a black lid", "label": 1}, {"captions": [" of a small island featuring a white lighthouse, a fountain, and a grassy crater.", "a low poly of a plant on a white object, resembling a paper or plastic bag."], "sample_ids": ["2a30e69498ff4fd1a33c1fb72286f553", "d49d8ed244094349a99e4faca05e0690"], "properties": ["lighthouse, fountain, crater", "low poly, plant, white"], "captions_pred_pc": ["a black beanie with sparkles on a white background", "a black and white illustration of a dandelion on a white background a black and white illustration of a dandelion on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of an object on top of a pedestal", "a 3d model of a plant growing out of a rock"], "question": "which object is white", "label": 1}, {"captions": [" featuring a sandbox, sand bucket, wooden blocks, water container, and a lamp made out of blocks.", " of a character wearing glasses and a hat."], "sample_ids": ["674a36147ffb47059e48abc9fa19d923", "032e7ce682ff43d5aa5ca4fd34eacf14"], "properties": ["sandbox, sand bucket, wooden blocks", "hat, glasses, character"], "captions_pred_pc": ["for a black and white photo of a basketball hoop", "in 15 words or less a silhouette of a bell on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a brick, a box, and a pile of dirt royalty free 3d model preview no. 3", "a 3d model of a person wearing sunglasses"], "question": "which entity has a hat", "label": 1}, {"captions": [" a wooden object, including a board, piece of wood, box, and shelf.", " a small white table with stairs and a ladder, featuring a black and white kitchen hood and a black square ceiling light."], "sample_ids": ["c986212445a1466ca7be7b5ac6bea729", "5565c16f297e405f9d5dbf0ebb623605"], "properties": ["wood, board, shelf", "table, stairs, ladder"], "captions_pred_pc": ["a black and white drawing of snowflakes on a white background", "above a black and white photograph of a small square in the center of the image"], "captions_pred_image": ["a 3d rendering of a piece of marble", "a 3d model of a table with a stool on top"], "question": "which object has more stairs", "label": 1}, {"captions": [" of a house with a roof.", " of a wildebeest with horns, royalty-free."], "sample_ids": ["195ce38d57164eb588d19f8bd337f36e", "f01175538f7b4ffe8e2648d772e94c5c"], "properties": ["roof, house, roof", "image size, image, royalty"], "captions_pred_pc": ["a black and white drawing of a toilet on a white background", "a black and white image of a dog jumping in the air"], "captions_pred_image": ["a 3d model of a small house royalty free 3d model preview no 2", "a 3d model of a wildebeest"], "question": "which image is royalty free", "label": 1}, {"captions": [" of a large gray sphere", "a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars."], "sample_ids": ["05a7c0e799e34e6998b11933dc743e03", "1b3945962a4b4cda9fe939dc5d63e789"], "properties": ["size, color, shape", "a room, a cake, a table"], "captions_pred_pc": ["a black and white circular pattern on a white background", "a black and white illustration of an object on a white background"], "captions_pred_image": ["a 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d model of the moon 3d", "a 3d rendering of a white room with various items in it"], "question": "which entity has a room?", "label": 1}, {"captions": ["a red and white 3d-rendered motorcycle with luggage.", "royalty-free of a golden gramophone with a wooden base and umbrella."], "sample_ids": ["e9c1ff9a40494ea59c2263fd3d6ea477", "90bd720f583c4130a6273f5a94f6ae69"], "properties": ["color, model, luggage", "image is royalty-free, gramophone, base"], "captions_pred_pc": ["a black and white image of a motorcycle on a white background", "a black and white illustration of a water droplet in the shape of a snowflake"], "captions_pred_image": ["a motorcycle with two bags on the back of it", "a 3d model of an antique gramophone"], "question": "which object has a wooden base", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", " a destroyed house and plane amidst a town with buildings."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "0fd3ddca09194b8f94ef731af3b64a08"], "properties": ["color, shape, and size", "house, plane, town"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "above a black and white drawing of a piece of paper"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d model of a damaged building"], "question": "which entity is a building?", "label": 1}, {"captions": [" white crocs clogs with blue straps and holes.", " a stone throne with stairs and a tree, accompanied by a concrete wall featuring a statue and a fireplace with a shelf above it."], "sample_ids": ["4622e88fd9264b37997671efeb73af85", "93fb4197f0014f7582029af24c7ed9de"], "properties": ["color, white, blue", "throne, stairs, tree"], "captions_pred_pc": ["of a black and white image of a shoe with holes", "in 15 words or less a black and white image of a toilet paper roll on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white clog shoe royalty free 3d model preview no 1", "a 3d model of a throne with a tree on it"], "question": "which entity has more stairs", "label": 1}, {"captions": [" a shattered piece of paper, resembling a broken phone and a flying newspaper.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["80d02e2b6ceb4a3a81b6b67d2d98bc0a", "bf18bfd89efd43389781050230467d58"], "properties": ["shattered, resembles, broken phone", "Lights, number, five"], "captions_pred_pc": ["for black ink brush strokes on a white background", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a snowboarder in mid-air over a snowy landscape", "a white chandelier with five white shades"], "question": "which entity has more lights", "label": 1}, {"captions": [" a glass bottle with liquid, ice, and a lid, resembling a salt shaker and a human figure.", " a glass bottle with liquid, ice, and a lid, resembling a salt shaker and a human figure."], "sample_ids": ["bb01282cb8b64470866260455b0b46fa", "bb01282cb8b64470866260455b0b46fa"], "properties": ["liquid, ice, lid", "liquid, ice, lid"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a dandelion on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration", "in 15 words or less a black and white illustration of a dandelion on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration"], "captions_pred_image": ["a 3d model of a plastic bottle on a white background", "a 3d model of a plastic bottle on a white background"], "question": "which entity has a lid?", "label": 0}, {"captions": [" a red sphere, resembling a ball, balloon, or lipstick.", "a white of a man with arms outstretched."], "sample_ids": ["f09589903f6146dfb623a6c1a07c5bfa", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["red, sphere, resembles", "image, color, white"], "captions_pred_pc": ["a black and white image of an object on a white background", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a black balloon floating in the air on a gray background", "a 3d model of a man with his arms outstretched"], "question": "which image is white", "label": 1}, {"captions": [" a yellow boat/submarine with a red arrow and light.", " a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles."], "sample_ids": ["9b9c31fe4e6b4004a4cb34176f329c04", "bd7aab78974643f5a0660c699daf8eb3"], "properties": ["color, shape, light", "roof, color, yellow"], "captions_pred_pc": ["a black and white image of a letter 'l' on a white background", "a black and white drawing of a room"], "captions_pred_image": ["a 3d rendering of a table with an object on it", "a 3d model of a table and chairs on a white background"], "question": "which entity has a yellow roof", "label": 1}, {"captions": [" a black and gold pistol.", "a white of a gun."], "sample_ids": ["402b57a5e32540b4938db1a9d3976220", "d7c12235efd1471db5b7145b63dbd11a"], "properties": ["color, black, gold", "color, white, gun"], "captions_pred_pc": ["a black and white image of a toothbrush", "of a white object on a white background"], "captions_pred_image": ["a 3d model of a gun royalty-free 3d model preview no.", "a white 3d model of a rifle on a gray background"], "question": "which gun is white", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["roof truss, insulation, suspended ceiling", "island, terrain, water"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", " a small house with a red roof."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "085db9059b744673b5623b5338e02196"], "properties": ["yellow, table, roof", "roof, red, house"], "captions_pred_pc": ["a black and white drawing of a floor plan", "a black and white dotted square on a white background"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of a small shed in the snow"], "question": "which house has a red roof", "label": 1}, {"captions": [" of green circles with white dots on a gray background.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["af000148afb545819c8895a4a22f9afe", "c3a82df41875402285608ef13a55df57"], "properties": ["color, background, foreground", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white illustration of a group of hats", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a group of white circles on a gray background", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a small orange piano with legs.", " a rusted metal barrel with a yellow and red warning sign and stripe on it."], "sample_ids": ["d793de7c08d74414beeb8ea50f730705", "5a49ad82ef7a4d33badea2261720f518"], "properties": ["color is orange, size is small, legs", "rusty, warning, metal"], "captions_pred_pc": ["a black and white illustration of a bench", "a black and white drawing of dots on a white background a black and white drawing of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a grand piano royalty free 3d model preview no 2", "a black and white photograph of a barrel"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a black flat screen lcd monitor on a stand.", " a small white building featuring a room with a door and a white shelf."], "sample_ids": ["0cc63371c12344e892d1c7be5a9eb782", "d7b78fa9a6b64f6095b881bc619b04fe"], "properties": ["screen, monitor, stand", "room, door, shelf"], "captions_pred_pc": ["a black and white close-up of a television on a stand", "above a black and white illustration of a person standing in front of a door"], "captions_pred_image": ["a 3d model of a computer monitor royalty free 3d model preview no. 2", "a 3d model of an empty room"], "question": "which entity has a door?", "label": 1}, {"captions": [" a white rock with green grass and moss on it.", " a house with a green, wooden-structured roof."], "sample_ids": ["0b53ca3fca7b4ef7bef27cb287daf32e", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["color, grass, moss", "roof, color, green"], "captions_pred_pc": ["above a black and white drawing of a long, curved line on a white background", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a 3d sculpture of a piece of white chocolate in the shape of a map", "a 3d model of a house with a triangular roof"], "question": "which entity has a green roof", "label": 1}, {"captions": [" a table with pillar-like yellow and white poles, featuring a wooden structure, large tent, glass roof, and a chandelier with numerous glass tubes.", " a wooden table with a plant on it."], "sample_ids": ["fa06167d83e54b05bdfbeeae2ca7c8a6", "3170a8fc0ebf4d71ab19c723be68987f"], "properties": ["table, structure, roof", "table, plant, wood"], "captions_pred_pc": ["a black and white image of a map with dots", "a 3d model of the molecule on a white background a 3d model of the molecule on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a building with many pillars", "a side table with a potted plant sitting on top of it"], "question": "which table is made of wood", "label": 1}, {"captions": ["a 3d object featuring a white door, box, wall with hooks, plastic bucket, sheet of paper, curved wall, and paper with a hole.", " a small house on an island with a boat and a bird on a rock."], "sample_ids": ["cbbcf78010e34fa9b2e963452d081eb7", "1e56a92a0ddc41e59694bd1ad1656149"], "properties": ["a, box, wall", "house, rock, bird"], "captions_pred_pc": ["a black and white illustration of a flower", "a black and white drawing of a boat in the middle of a body of water"], "captions_pred_image": ["a 3d rendering of a white wall with two hooks hanging from it", "a 3d rendering of a house on a rock"], "question": "which object has a boat?", "label": 1}, {"captions": [" a house with wooden framing and trusses.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["4501794e257c4a8ba60a94757d8e93a9", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["frame, trusses, wood", "island, terrain, water"], "captions_pred_pc": ["a black and white drawing of a window", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a house under construction", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" of a white rectangular shelf or light fixture on a gray background.", " of a wooden windmill with a red roof."], "sample_ids": ["a84221f27ed0416280f8e67563f95ed7", "2ad8fca30285483d8b7f602fa078215d"], "properties": ["background, color, white", "roof, color, red"], "captions_pred_pc": ["a black line on a white background", "a white and black image of a snowflake in the shape of a snowflake"], "captions_pred_image": ["a long white plastic strip on a gray background", "a 3d model of a windmill on a gray background"], "question": "which object has a red roof", "label": 1}, {"captions": ["a white teddy bear, cloud, skull, octopus, and bird in various positions on a gray background.", " a house with a roof structure, featuring a brick wall and suspended box."], "sample_ids": ["dd5849aced0443b1b4b38d413f7e06c4", "1a7bfcf3755142bab90d3d7cb02d0f2c"], "properties": ["background, color, white", "roof, structure, wall"], "captions_pred_pc": ["a black and white image of a cat's head", "a black and white illustration of a group of dots on a white background"], "captions_pred_image": ["a 3d model of an animal skull in white on a gray background", "a 3d model of a building with a roof"], "question": "which entity has a roof structure", "label": 1}, {"captions": [" featuring a chair, table, and refrigerator.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["cd967e38e9364ed28c4090e905740c9d", "c3a82df41875402285608ef13a55df57"], "properties": ["chair, table, refrigerator", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a 3d rendering of a white surface with black dots on it", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a chair, a table, and a refrigerator", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a blue and green sneaker with a yellow accent and blue sole.", " a house with a pink roof, brick walls, and insulated ceiling."], "sample_ids": ["a2b393941a564397afb75f9ae8e70276", "c8936ace72954650b4e2d84246964849"], "properties": ["color, blue, green, yellow, blue", "roof, color, pink"], "captions_pred_pc": ["a pair of sneakers on a white background", "a black and white drawing of a toilet"], "captions_pred_image": ["a 3d model of a pair of sneakers royalty free 3d model preview no 2", "a 3d model of a house with a roof"], "question": "which entity has a roof that is pink?", "label": 1}, {"captions": [" of a jacket on a mannequin with arms outstretched.", " of two rocks with ice elements."], "sample_ids": ["1dc7708fd7bd4ea1b035f4c48dbd7868", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["arm, jacket, mannequin", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white image of a woman's torso", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a woman's jacket royalty free 3d model preview no.2", "a 3d image of two rocks on a gray surface"], "question": "which image shows a rock with ice elements?", "label": 1}, {"captions": [" a blue and white building structure with a table and suspended ceiling.", " a small white building with a floor plan and ceiling light."], "sample_ids": ["fc63507c452c4a4b89f614f2b8bff76a", "1df55bb7035941cc9829aa904e2af065"], "properties": ["color, table, ceiling", "floor plan, ceiling light, color"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a room", "a line of dots on a white background a line of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a table with multiple tables stacked on top of each other", "a white 3d model of a house"], "question": "which building has a floor plan?", "label": 1}, {"captions": [" of a multi-colored spooling machine with wires and circuit board, featuring a lamp and shelf with toys.", "a wooden tower made of stacked blocks with holes in them, resembling a toy castle."], "sample_ids": ["0e3f5cc16806492b948d41a748819ce3", "da8b5d21da9b4037982f29383d60b100"], "properties": ["color, shape, material", "resembles, toy, castle"], "captions_pred_pc": ["a black and white image of a decorative tile", "a black and white drawing of a pair of scissors"], "captions_pred_image": ["a 3d rendering of an electronic device on a white surface", "a 3d model of a tower made out of blocks"], "question": "which entity is made of wood", "label": 1}, {"captions": [" of a small island featuring a white lighthouse, a fountain, and a grassy crater.", " of a stone fountain with a lion statue, surrounded by an archway inspired by the arch of triumph."], "sample_ids": ["2a30e69498ff4fd1a33c1fb72286f553", "a72700696c3b44ef8101d1e71e914bc9"], "properties": ["lighthouse, fountain, crater", "lion, statue, fountain"], "captions_pred_pc": ["a black beanie with sparkles on a white background", "a black and white image of a metal object"], "captions_pred_image": ["a black and white image of an object on top of a pedestal", "a 3d image of a lion statue on a wall"], "question": "which fountain has a statue", "label": 1}, {"captions": [" of a white rectangular shelf or light fixture on a gray background.", " a house with a roof and beams."], "sample_ids": ["a84221f27ed0416280f8e67563f95ed7", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["background, color, white", "roof, beams, house"], "captions_pred_pc": ["a black line on a white background", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a long white plastic strip on a gray background", "a 3d model of a building with a roof"], "question": "which entity has a roof and beams", "label": 1}, {"captions": [" a small white building featuring a room with a door and a white shelf.", " a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light."], "sample_ids": ["d7b78fa9a6b64f6095b881bc619b04fe", "5ea0962b100b4fccb761ed84afe027b5"], "properties": ["room, door, shelf", "house, table, chair"], "captions_pred_pc": ["above a black and white illustration of a person standing in front of a door", "above a black and white photograph of an open door"], "captions_pred_image": ["a 3d model of an empty room", "a 3d rendering of a small white table with a chair"], "question": "which entity has a table and chair?", "label": 1}, {"captions": ["a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars.", "a featuring a building, a coin, a small black box, and a ball."], "sample_ids": ["1b3945962a4b4cda9fe939dc5d63e789", "949cf1a57aea45d18261e980b21b8c35"], "properties": ["a room, a cake, a table", "a, building, coin, ball"], "captions_pred_pc": ["a black and white illustration of an object on a white background", "a black and white illustration of a triangular shaped object"], "captions_pred_image": ["a 3d rendering of a white room with various items in it", "a 3d model of a box with a coin next to it"], "question": "which entity has a coin?", "label": 1}, {"captions": [" of a wooden building frame with truss and roof structure.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["1313f8185cf24f3bbd73ff4e4ddfab3e", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["frame, truss, roof", "island, terrain, water"], "captions_pred_pc": ["a black and white image of a ladder on a white background", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d rendering of a bridge over a road", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": ["a featuring a white dog, white wolf, white and pink cats, and a pink fish, all with pink eyes.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["5392e72784be41e485bf2d43cf0bee6a", "c3a82df41875402285608ef13a55df57"], "properties": ["color, white, pink, eyes", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white image of a bear's head", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a wolf's head on a white background", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": ["white wall lamp with a leaf design", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["7afad3ca209c4a91b5457d28bdc2b916", "a17477b445b3443189dad22f768b888b"], "properties": ["design, leaf, white", "roof, pillar, stairs"], "captions_pred_pc": ["above a black and white image of a circular object", "a black and white image of a square with dots"], "captions_pred_image": ["a white wall sconce with a leaf design and a light bulb", "a 3d model of a small building with a balcony"], "question": "which entity is a building?", "label": 1}, {"captions": [" a pink-framed building structure with beams and trusses.", " a building with blue metal framing and structure."], "sample_ids": ["18e392c5360146eda498c5edab25b15c", "ce40210c2a7e49dfaebbd934ccec4eca"], "properties": ["frame, beams, trusses", "color, blue, structure"], "captions_pred_pc": ["a black and white drawing of a metal grate", "a black and white image of dots on a white background"], "captions_pred_image": ["a 3d model of a building under construction", "a 3d model of a building under construction"], "question": "which building has a blue metal framing and structure?", "label": 1}, {"captions": [" a crab with long legs, wires, and tentacles, resembling a hybrid of an octopus, squid, spider, and robot.", "a yellow gold ring with an engraved quote, \"strength and love.\""], "sample_ids": ["ec5914b53b6a4cde8de4820050bc46c5", "c155767db07340f2813c1b3dfa8d63b9"], "properties": ["resembles, octopus, squid, spider, robot", "ring, material, gold"], "captions_pred_pc": ["a black and white illustration of a jellyfish", "of a black bangle bracelet on a white background"], "captions_pred_image": ["a 3d model of a robotic octopus on a white background royalty free 3d model preview no.2", "a white wedding band with the words strength in weakness engraved on it"], "question": "which object is made of gold", "label": 1}, {"captions": [" a small house with stairs and a roof.", " a large steel building with a pool."], "sample_ids": ["e9305c80010f4e3b9de9789f01a9bee5", "72eed67d8d884c819b28e2e95eb48f06"], "properties": ["roof, stairs, house", "building material, pool, steel"], "captions_pred_pc": ["above a black and white image of a square with dots all over it", "a black and white illustration of a building with a tree growing out of it"], "captions_pred_image": ["a 3d rendering of a podium on a wooden floor", "a 3d model of a concrete structure"], "question": "which building is made of steel", "label": 1}, {"captions": [" of a white wall-mounted light switch, electrical outlet, and various furniture pieces.", "3d white playground set with slides and swings, featuring a plane, building, and dragon in the sky."], "sample_ids": ["b195bf7ba6094e1b812e4312deeeb360", "e694d53545d449319a64cceb0280c3c6"], "properties": ["light switch, electrical outlet, furniture", "3d, slide, swing"], "captions_pred_pc": ["a black and white image of a pair of sunglasses", "for a 3d model of the letter 'j'"], "captions_pred_image": ["a 3d rendering of a room with a white background", "a 3d model of a playground slide"], "question": "which entity has a plane", "label": 1}, {"captions": [" a castle on an island with a small floating house, trees, and clouds.", "a white glass beer mug."], "sample_ids": ["c4c09479570943e2845fbd4c6a450568", "1d686cbd3e9a4c629a43088658989286"], "properties": ["castle, island, house", "color, white, glass"], "captions_pred_pc": ["above a black and white illustration of a group of dots in the shape of a circle", "a black and white drawing of a beer mug on a white background"], "captions_pred_image": ["a 3d model of a small house on an island", "a 3d model of a glass pitcher"], "question": "which object is made of glass", "label": 1}, {"captions": [" a house with a green roof and lawn.", " a white building with a red roof."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "1f9580be397d4f948bf53fe1d5bc5756"], "properties": ["roof, green, lawn", "color, white, roof, red"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "in 15 words or fewer a black and white drawing of a camera"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a 3d model of an office building"], "question": "which building has a red roof", "label": 1}, {"captions": [" featuring a moss-covered rock, mushroom in grass, and green leaves with a brown spot.", " a small house on an island with a boat and a bird on a rock."], "sample_ids": ["34ebe81ae93841ca829efd15aee4d8c1", "1e56a92a0ddc41e59694bd1ad1656149"], "properties": ["moss, mushroom, grass", "house, rock, bird"], "captions_pred_pc": ["for a black and white illustration of a cloud on a white background", "a black and white drawing of a boat in the middle of a body of water"], "captions_pred_image": ["a 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor", "a 3d rendering of a house on a rock"], "question": "which entity has a bird on a rock?", "label": 1}, {"captions": [" of a black flat screen lcd monitor on a stand.", "a featuring a tv, chair, desk, laptop, man, couch, computer, and building."], "sample_ids": ["0cc63371c12344e892d1c7be5a9eb782", "d4208427217343e6af1b9b4a42a2f730"], "properties": ["screen, monitor, stand", "tv, chair, laptop"], "captions_pred_pc": ["a black and white close-up of a television on a stand", "for 3d illustration of a smartphone with a qr code on the back and a qr code on the front"], "captions_pred_image": ["a 3d model of a computer monitor royalty free 3d model preview no. 2", "a 3d model of a person sitting on a couch"], "question": "which entity has a laptop", "label": 1}, {"captions": ["a featuring a chair, a building, a glass tower, and a throne made of money, with various unique elements such as a green roof and a computer chip.", " a small black house with a green roof, resembling a shed or container."], "sample_ids": ["18d2e75f23474d7489a6d7d605dfc76d", "bdb8e4c36ccb477890fd6ae569ae305c"], "properties": ["throne, chair, building", "black, roof, green"], "captions_pred_pc": ["a black and white illustration of a person sitting on a bench", "a black and white drawing of a square with dots all over it"], "captions_pred_image": ["a 3d model of a building on top of a table", "a 3d model of a small black building"], "question": "which entity has a green roof", "label": 1}, {"captions": [" a house featuring a wooden roof truss structure and ceiling with wood beams.", " a house featuring a wooden roof truss structure and ceiling with wood beams."], "sample_ids": ["990f06da2ba4488da8371f68da6b4523", "990f06da2ba4488da8371f68da6b4523"], "properties": ["roof truss, beams, structure", "roof truss, beams, structure"], "captions_pred_pc": ["a black and white illustration of a staircase in the shape of the letter 'l'", "a black and white illustration of a staircase in the shape of the letter 'l'"], "captions_pred_image": ["a 3d model of a house with a wooden roof", "a 3d model of a house with a wooden roof"], "question": "which entity has a wooden roof truss structure and ceiling with wood beams?", "label": 0}, {"captions": ["a featuring a futuristic chair, a black cat with a sword, a person holding an umbrella, and a black and blue dragon, airplane, and helicopter.", "a featuring a building, a square, a cloud with a square in the middle, a group of people in a room and a field, and clouds floating in the sky."], "sample_ids": ["4df70180f2ea400782d2e2de76063894", "8557a15b9f244d2cbf16786dbc8b7b25"], "properties": ["color, black, blue", "building, room, sky"], "captions_pred_pc": ["a 3d illustration of a girl in a dress 3d illustration of a girl in a dress, isolated on a white background royalty free stock illustration", "above a black and white image of a person's hand holding a paintbrush"], "captions_pred_image": ["a black and white 3d model of a person holding a sword in the shape of a paper airplane", "a 3d rendering of white clouds floating in the air"], "question": "which entity has a room?", "label": 1}, {"captions": [" a white wooden climbing frame with swing set and ladders.", " a wooden shed with a gray roof."], "sample_ids": ["f2935306c64a479685462220e33e6f3c", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["- material is wood - color is white - height is 1.8 m", "roof, color, gray"], "captions_pred_pc": ["an illustration of a barbed wire fence", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a playground ladder set royalty free 3d model preview no.2", "a 3d model of a shed with a gray roof"], "question": "which is not made of wood", "label": 1}, {"captions": [" of a numbered rock.", "royalty-free of a golden gramophone with a wooden base and umbrella."], "sample_ids": ["837f450ac17040f3adc4baf1dda9969b", "90bd720f583c4130a6273f5a94f6ae69"], "properties": ["number, color, shape", "image is royalty-free, gramophone, base"], "captions_pred_pc": ["above a black and white drawing of a piece of fabric", "a black and white illustration of a water droplet in the shape of a snowflake"], "captions_pred_image": ["a 3d image of a rock with a number on it", "a 3d model of an antique gramophone"], "question": "which object has a wooden base", "label": 1}, {"captions": ["a 3d-printed green plastic cylinder with a hole in the middle.", " of a white human skull with broken bone elements."], "sample_ids": ["9faa0c251d394f368f4f537ea21f977f", "6ca0f91b85464d7a845b3977351dd0b5"], "properties": ["color, material, shape", "color, white, skull"], "captions_pred_pc": ["a black and white image of a glittering object", "a black and white image of a cat's x-ray"], "captions_pred_image": ["a 3d model of a white object on a gray background", "a 3d model of a human skull in white"], "question": "which entity is white", "label": 1}, {"captions": [" a small house with a roof and ceiling-mounted air conditioner.", " a metal building with a purple roof and cage structure."], "sample_ids": ["6965067ea9e34357a7af21a2f078fbac", "cbc10fb816034537b052e7c8fb75c4a6"], "properties": ["roof, air conditioner, house", "roof, purple, structure"], "captions_pred_pc": ["a black and white illustration of a window", "for a black and white image of a bench"], "captions_pred_image": ["a 3d rendering of a small house with a covered porch", "a 3d model of a building with a metal roof"], "question": "which building has a cage structure?", "label": 1}, {"captions": [" a set of yellow and black tools, including a magnifying glass, hexagonal keys, and screwdrivers.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["40285a60f32749a8ae38957c7b073fe8", "c3a82df41875402285608ef13a55df57"], "properties": ["color, yellow, black", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a set of three black and white keychains on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a pair of hexagonal keys and a pair of hexagonal scissors royalty free 3d model preview no 3", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a yellow and white structure featuring lamp posts, bridge, poles, pier, stairway, and hanging rods.", " a large metal building with a roof and truss structure."], "sample_ids": ["34257a26ad2e4c6d91ef6d5cd4bd7c43", "b85a99699ccd4bcba213322113bb253d"], "properties": ["color, pier, stairway", "roof, truss, structure"], "captions_pred_pc": ["above a black and white illustration of an open door", "of a metal grate on a white background"], "captions_pred_image": ["a 3d model of a long bench on a white background royalty free 3d model no.", "a 3d model of a long metal fence"], "question": "which structure has a roof and truss structure", "label": 1}, {"captions": ["3d white triangular ramp on a gray background.", " of a cherry blossom bonsai tree with pink flowers."], "sample_ids": ["5b050792c1634ddb9d1a1cc00bdb3577", "037fff0f153c41ea8b9c9392c2e2439a"], "properties": ["color, shape, background", "flower, color, pink"], "captions_pred_pc": ["a black and white illustration of a metal rod", "for a black and white illustration of a person on a skateboard"], "captions_pred_image": ["a long white sheet of paper on a gray background", "a 3d model of a bonsai tree on a pedestal"], "question": "which entity has a pink flower?", "label": 1}, {"captions": [" of a white plastic bar, resembling a shelf or towel rod, with a metal light fixture.", " a small white building with stairs and a white table."], "sample_ids": ["f84dec547a3d4710b48db11fc9fa489a", "e30374c614f54fdb90f35b96b071349d"], "properties": ["light source, fixture, color", "building, stairs, table"], "captions_pred_pc": ["a black and white photo of a small square on a white background", "above a black and white drawing of a cat sitting on top of a letter 'e'"], "captions_pred_image": ["a 3d rendering of a white object on a gray background", "a 3d model of a building with a staircase"], "question": "which entity is whiter", "label": 1}, {"captions": ["white hospital bed, medical cart, and bedside table with wheels.", " of two rocks with ice elements."], "sample_ids": ["e3ef6fd54af440efa31984214dd6f11d", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["bedside table, wheels, white", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white illustration of a stool on wheels", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d illustration of a white medical trolley on wheels", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" orange and yellow fish, candy, and a pair of shoes.", " a house with a flat roof structure."], "sample_ids": ["0fa2a605d7e940e5946f63c0f74234f3", "abc52d210d71415296730bb00352ce6f"], "properties": ["color, shape, and material", "roof, flat, structure"], "captions_pred_pc": ["a black and white photograph of a group of plastic cups", "a black and white drawing of a window with dots around it"], "captions_pred_image": ["a 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes", "a 3d model of a house with a roof"], "question": "which entity is made of a material that is not food", "label": 1}, {"captions": [" a small white building featuring a room with a door and a white shelf.", " a white sofa, chair, and box."], "sample_ids": ["d7b78fa9a6b64f6095b881bc619b04fe", "4c59733ebd634594a921b7ace60e4142"], "properties": ["room, door, shelf", "sofa, chair, box"], "captions_pred_pc": ["above a black and white illustration of a person standing in front of a door", "a black and white drawing of a couch with dots"], "captions_pred_image": ["a 3d model of an empty room", "a 3d model of a white chair"], "question": "which entity has a sofa", "label": 1}, {"captions": [" a white and blue building with a black roof.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["c893118316ee43e18322e5964b2806c5", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, white, blue, roof, black", "a, material, clay"], "captions_pred_pc": ["a black and white illustration of a person standing on top of a hill made up of tiny dots", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a white building on a gray background royalty free 3d model no.", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": ["a 3d low-poly helmet model with green and beige colors and wings.", "a pixelated-textured purple sphere ."], "sample_ids": ["b9f40c80d70e432390780273137dcbc0", "fb68393941804e769d5c9b372864a642"], "properties": ["color, material, texture", "texture, color, shape"], "captions_pred_pc": ["a black and white image of a spider's head", "a black and white square made of dots on a white background"], "captions_pred_image": ["a 3d model of a futuristic helmet with wings", "a 3d model of a gray sphere on a white background"], "question": "which entity has a texture that is pixelated?", "label": 1}, {"captions": ["a low poly of a deer, antelope, llama, capybara, and kangaroo.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["8b4c2e3e76524d85a9395ea1169d953e", "bf18bfd89efd43389781050230467d58"], "properties": ["low poly, llama, kangaroo", "Lights, number, five"], "captions_pred_pc": ["above a black and white image of an animal sculpture", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d low poly animal standing on its hind legs", "a white chandelier with five white shades"], "question": "which entity has more lights", "label": 1}, {"captions": [" a small white barn with a metal roof.", " a small, snow-covered house."], "sample_ids": ["4ca3342a96824684845f7d0e062ab176", "0d00d10b90134dbe9ce7b2b3d6669237"], "properties": ["roof, metal, white", "house, snow, cover"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a house made of dots", "in 15 words or less a black and white image of a piece of paper on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a barn", "a piece of broken glass on a white background"], "question": "which building is covered in snow", "label": 1}, {"captions": ["a 3d white rectangular box with multiple compartments and a white lid.", " a small house with a staircase, balcony, and wooden floor."], "sample_ids": ["966f6e9324a74d90831924895d3f2e8c", "e67e211004cb450cbaf8139dd74ba39b"], "properties": ["size, color, shape", "floor, staircase, balcony"], "captions_pred_pc": ["a black and white illustration of a patterned rug", "a black and white drawing of a wallet"], "captions_pred_image": ["a 3d rendering of a white cardboard box on a gray background", "a 3d model of a bench on a wooden floor"], "question": "which entity has a wooden floor", "label": 1}, {"captions": [" of a white supreme logo t-shirt, low poly design.", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["bea8441c08d94366b96b53775391d8e6", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["color, white, logo", "lizard, rock, stuffed animal"], "captions_pred_pc": ["for a black and white image of a shirt with dots", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white t-shirt with a supreme logo", "a 3d model of a vehicle with wheels and tires"], "question": "which entity has a stuffed animal?", "label": 1}, {"captions": ["a white 3d printed ring with a bow, snake design, and two arms.", " a rusty green metal box with a handle and a gun inside."], "sample_ids": ["1fa054d12a084b7786d1185e0dc8787c", "76cfd0e88ce243d483919a018a4f1a9e"], "properties": ["color, material, shape", "box, handle, gun"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a virus on a white background stock illustration", "a black and white square with dots on a white background"], "captions_pred_image": ["a 3d printed ring with a unique design", "a 3d rendering of a metal box with a handle"], "question": "which object is made of metal", "label": 1}, {"captions": [" a white and yellow table with yellow legs and a metal roof structure.", "a white of a woman with her arms outstretched."], "sample_ids": ["a71c43af3c944bf5b6d12375f7d54811", "2cbfaa4fb2d84a6f94e67b4fd6e2f26f"], "properties": ["color, white, yellow, roof, metal", "image, color, white"], "captions_pred_pc": ["above a black and white drawing of a floor plan", "a black and white silhouette of a person on a skateboard"], "captions_pred_image": ["a 3d model of a white table with multiple cubes", "a 3d model of a woman with her arms outstretched"], "question": "which entity is a picture of a woman?", "label": 1}, {"captions": [" a small white house with stairs and a wall-mounted shelf.", " a large white and metal building with a metal roof structure."], "sample_ids": ["10c4ba5b0db4490db9c00c21c94cb41f", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["house, color, white", "roof, metal, white"], "captions_pred_pc": ["above a black and white drawing of a bench", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a 3d model of a small white building", "a 3d model of a large white box"], "question": "which building has a white roof", "label": 1}, {"captions": [" a small green cannon wheel with a handle.", "a 3d low poly model of a hammer and an axe with wooden handles."], "sample_ids": ["97d13db38fa24556afa1eef04fc518e6", "c4e45a41478e42418399074b88d8920f"], "properties": ["color, green, handle", "axe, handle, wood"], "captions_pred_pc": ["of a small black object on a white background", "for a black and white image of a bird's wing"], "captions_pred_image": ["a 3d model of a cannon with wheels", "a 3d model of a knife on a white background"], "question": "which object has a handle", "label": 1}, {"captions": [" of a computer with a green screen, keyboard, and white box.", " a brick building with a roof structure and roof truss."], "sample_ids": ["fb1e5a04ef4644f98219e1d5d52ab073", "84e8acad28664a738df69d719df9e263"], "properties": ["screen, keyboard, box", "roof, structure, truss"], "captions_pred_pc": ["a box made up of many small dots on a white background a box made up of many small dots on a white background royalty free illustration", "a black and white polka dots pattern on a white background polka dots pattern on a white background illustration"], "captions_pred_image": ["a 3d model of a vintage computer royalty-free 3d model preview", "a 3d model of a brick building with a roof"], "question": "which entity has a roof structure", "label": 1}, {"captions": ["a white of a city with buildings and a gold spoon.", "a white of a city with buildings and a gold spoon."], "sample_ids": ["2351471a2d2145c59fec5f68ffae4816", "2351471a2d2145c59fec5f68ffae4816"], "properties": ["image, city, spoon", "image, city, spoon"], "captions_pred_pc": ["a black and white image of a diamond shaped piece of fabric", "a black and white image of a diamond shaped piece of fabric"], "captions_pred_image": ["a 3d model of a city skyline in white", "a 3d model of a city skyline in white"], "question": "which image shows a city with buildings and a gold spoon?", "label": 0}, {"captions": ["a featuring a white box with a hole, a paper clip, and a lock.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["839e143bb1684aaa955f2c3e0cf4eef2", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["box, paper clip, lock", "roof, color, yellow"], "captions_pred_pc": ["above a black and white image of a clock in the shape of a spiral", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a stapler with a staple in it royalty free 3d model no.", "a 3d model of a house with a roof"], "question": "which entity is a building?", "label": 1}, {"captions": [" a house with a pink roof, truss, and a square white ceiling lamp.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["91b2e9e4660946f5b4808a18b5323b69", "bded33af34104b9686b845dfd18309a9"], "properties": ["roof, truss, lamp", "table, staircase, light"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d model of a house with a metal roof", "a 3d model of a small table with a staircase"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" a black and green sphere with blue lights.", " a green circuit board."], "sample_ids": ["de9edcb24820415da05370c37bb908c3", "1b2cc7abaf5e4e7e9d4652163d051b16"], "properties": ["color, shape, light", "color, green, circuit board"], "captions_pred_pc": ["a black and white map of a city with dots on it royalty free illustration", "of the item in the image"], "captions_pred_image": ["a black and white image of the sun in the sky", "a 3d model of an electronic component"], "question": "which entity is a circuit board?", "label": 1}, {"captions": [" a house featuring a roof, floor plan, heating system, and ceiling light fixture.", " a large metal building with a roof and truss structure."], "sample_ids": ["269939560e08432f9e134309b9a1c587", "b85a99699ccd4bcba213322113bb253d"], "properties": ["floor plan, heating system, ceiling light fixture", "roof, truss, structure"], "captions_pred_pc": ["a black and white drawing of a house", "of a metal grate on a white background"], "captions_pred_image": ["a 3d model of a building with a glass facade", "a 3d model of a long metal fence"], "question": "which entity has a roof", "label": 1}, {"captions": ["a featuring a metal refrigerator, a graffiti-covered sink, and a metal toilet, all with rusted elements.", " of a torn piece of paper, a hole in a wall, and a guitar with a sign on it, accompanied by a piece of metal, wood, and a large rock."], "sample_ids": ["e65de9c4ec9242679a45e74733f7d61d", "cc4ccf85d4c1425cb5975b8b5664d38a"], "properties": ["rusty, sink, graffiti", "paper, hole, sign"], "captions_pred_pc": ["a black and white drawing of a room with a door and a rug on the floor a black and white drawing of a room with a door and a rug on the floor royalty free illustration", "a silhouette of a map of the state of karnataka, india on a white background royalty free illustration"], "captions_pred_image": ["a stainless steel toilet bowl on a pedestal", "an image of a torn piece of paper in the shape of a map"], "question": "which entity has a sign on it", "label": 1}, {"captions": [" of a wooden tv stand with two drawers and handles.", " a clear glass table with metal legs and balls on top."], "sample_ids": ["6409380e790442e6a5733eb447d4c510", "7c2bfa826f274377ac21f48d510848c3"], "properties": ["Drawer, Handle, Wood", "glass, metal, balls"], "captions_pred_pc": ["a black and white drawing of a line of dots on a white background", "a black and white image of a wine glass"], "captions_pred_image": ["a 3d rendering of a wooden entertainment center", "a clear acrylic foosball table"], "question": "which table has more metal", "label": 1}, {"captions": [" a human foot with a red, white, and pink bone structure, including a skull with red and white details.", " of two rocks with ice elements."], "sample_ids": ["39cdff793d3f4dcd898dd6b5222cb289", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["color, shape, size", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white image of an ornate design on a white background", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a human bone structure", "a 3d image of two rocks on a gray surface"], "question": "which entity is a rock?", "label": 1}, {"captions": [" of a cactus bunny planter with green leaves in a white bowl.", " a small, modern house with a green roof, located on a hill, surrounded by trees, grass, and a pond."], "sample_ids": ["03614cc7ab6943e5857f17c5814da146", "a452d5381dad4dc09f5ebe10635ae5fe"], "properties": ["color, bowl, green", "house, roof, green"], "captions_pred_pc": ["a black and white illustration of a dandelion in the shape of a dandelion on a white background illustration of a black and white illustration of a dandelion in the shape of a dandelion on a white background royalty free illustration", "above an illustration of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white"], "captions_pred_image": ["3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3", "a 3d model of a building with a black roof"], "question": "which entity has a green roof", "label": 1}, {"captions": [" of a white rectangular shelf or light fixture on a gray background.", " of a small white building with stairs and a lid."], "sample_ids": ["a84221f27ed0416280f8e67563f95ed7", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["background, color, white", "building, stairs, lid"], "captions_pred_pc": ["a black line on a white background", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a long white plastic strip on a gray background", "a 3d model of a white box on a gray background"], "question": "which entity has a lid?", "label": 1}, {"captions": ["a white 3d printed mickey mouse dice with various numbers and symbols on it.", " a pink, ear-shaped object."], "sample_ids": ["e2645ac544844f3c981203134a99c30c", "0e08d777c7b948a784dc15748e7b173f"], "properties": ["- material is plastic- shape is dice- color is white", "shape is ear, color is pink, material is plastic"], "captions_pred_pc": ["a circle of dots with the number 2 in the center", "a black and white illustration of a rock on a white background"], "captions_pred_image": ["a 3d printed white dice with a mickey mouse face", "a 3d model of a white object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a purple and green sphere.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["906b2d1219804f4f9e57d4f6cfd47a83", "bf18bfd89efd43389781050230467d58"], "properties": ["color, shape, size", "Lights, number, five"], "captions_pred_pc": ["in 15 words or less a black and white image of a sphere in the shape of a light bulb royalty free illustration", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a ball with a face on it", "a white chandelier with five white shades"], "question": "which entity has more lights", "label": 1}, {"captions": [" a small, modern house with a green roof, located on a hill, surrounded by trees, grass, and a pond.", " a black triangular metal object with a clock and cross on it, and a small machine on top."], "sample_ids": ["a452d5381dad4dc09f5ebe10635ae5fe", "b198a81dc41c4fde8be3ca51c3b0e676"], "properties": ["house, roof, green", "metal, cross, clock"], "captions_pred_pc": ["above an illustration of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white", "above a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a building with a black roof", "a 3d model of a piece of furniture"], "question": "which object has a clock and cross on it?", "label": 1}, {"captions": [" of a small wooden house with two roofs.", " a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog."], "sample_ids": ["30fc23ae4edb42609e30e029dede54bd", "f6c6e7a65a3e42dfa431b1d984c72f28"], "properties": ["house, roof, wooden", "house, fence, dog"], "captions_pred_pc": ["of a pair of stainless steel screws on a white background", "above a black and white drawing of a bathroom"], "captions_pred_image": ["a 3d model of a small barn", "a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a"], "question": "which house has a fence?", "label": 1}, {"captions": [" a large building featuring stairs, a clock tower, a balcony, and a roof.", " a large white and metal building with a metal roof structure."], "sample_ids": ["e7c78316f9cb4b8aad57a9c933f5278b", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["building, roof, balcony", "roof, metal, white"], "captions_pred_pc": ["a black and white illustration of a group of dots in the shape of a square on a white background royalty free illustration", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a 3d model of a building with a clock tower", "a 3d model of a large white box"], "question": "which building has a metal roof", "label": 1}, {"captions": [" a spiral staircase with a railing and wooden floor in a house.", " a large metal building with a roof and truss structure."], "sample_ids": ["40921ffd69db479294554d261daf3035", "b85a99699ccd4bcba213322113bb253d"], "properties": ["floor, railing, staircase", "roof, truss, structure"], "captions_pred_pc": ["above a black and white image of a computer screen", "of a metal grate on a white background"], "captions_pred_image": ["a 3d model of a spiral staircase", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": [" a robot head with a helmet.", "a pixelated eagle with a yellow and white head and yellow beak."], "sample_ids": ["55ccc07cea0640788d42448bc6559087", "9ce77b4cc05c4060a74b8f2568e6bbe8"], "properties": ["helmet, head, robot", "eagle, head, beak"], "captions_pred_pc": ["of a silver ring with black crystals", "a black and white image of a knife on a white background"], "captions_pred_image": ["a 3d model of a bust of a robot head royalty free 3d model preview no 2", "a 3d image of a penguin in a pixelated world"], "question": "which entity has a yellow beak", "label": 1}, {"captions": [" of a white van with a logo on the side.", "a 3d white cube with windows resembling a building."], "sample_ids": ["13e79a7ee023484da1807e88e006a820", "4a07a5293f024bb0a353954a056ef626"], "properties": ["color, logo, side", "- material is white- color is white- texture is textured"], "captions_pred_pc": ["a black and white illustration of a scissors", "a black and white image of a square made up of small dots"], "captions_pred_image": ["a black and white photo of a white van", "a 3d model of a cube"], "question": "which is not a building", "label": 1}, {"captions": [" a red \"welcome to northwich\" billboard on a wooden base.", " a small wooden cabinet with a drawer, doubling as a bedside table."], "sample_ids": ["225e4094141d416faba7c5598dc55ff8", "3755f3c19ae549c4bf708462db1b2581"], "properties": ["base material is wood, color is red, message is welcome to northwich", "Cabinet, drawer, wood"], "captions_pred_pc": ["a black and white illustration of a circular object with many small dots on it", "a black and white image of a square with dots all over it"], "captions_pred_image": ["a 3d model of a chalkboard on a pedestal royalty free 3d model preview no.2", "a 3d model of a wooden box with a lid"], "question": "which object is made of wood", "label": 1}, {"captions": [" a vibrant city skyline featuring various colored buildings, trees, and skyscrapers.", " of a white human skull with broken bone elements."], "sample_ids": ["1a1fb9b0d83845f6b1238fb45e0defff", "6ca0f91b85464d7a845b3977351dd0b5"], "properties": ["color, skyline, buildings", "color, white, skull"], "captions_pred_pc": ["a black and white illustration of a city skyline", "a black and white image of a cat's x-ray"], "captions_pred_image": ["a black and white 3d model of a city skyline", "a 3d model of a human skull in white"], "question": "which entity is white", "label": 1}, {"captions": [" a metal-framed wall with red and blue bars in a steel structure.", " a metal-framed wall with red and blue bars in a steel structure."], "sample_ids": ["fefc99453e2d4406a9668d5697224c0f", "fefc99453e2d4406a9668d5697224c0f"], "properties": ["color, red, blue, structure", "color, red, blue, structure"], "captions_pred_pc": ["a black and white image of a person holding a toothbrush", "a black and white image of a person holding a toothbrush"], "captions_pred_image": ["a 3d rendering of a metal frame structure", "a 3d rendering of a metal frame structure"], "question": "which entity has a red and blue structure?", "label": 0}, {"captions": [" a large, black and white circular building, resembling a stadium or ring structure.", " a white building, table, and various piles of paper, including a low-poly object."], "sample_ids": ["67f46bb0048244c687a58d1017a08f6b", "515210fb031f4ec89021ee8ce9e432e9"], "properties": ["building, color, black and white", "- building is white, table is white, piles of paper are white"], "captions_pred_pc": ["the letter c in black and white illustration of the letter c in black and white illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustr", "a black and white drawing of a piece of paper"], "captions_pred_image": ["a 3d model of a circular fence with black and white stripes", "a 3d model of a snowy landscape"], "question": "which building is white", "label": 1}, {"captions": [" a small bedroom with wooden floors, walls, roof, and shelf.", " of a beige and white round soap dish/small bowl."], "sample_ids": ["e602ac60041f4b4f84c044161e478781", "5414d75e47104589837f3df8b6de6d22"], "properties": ["floor, wall, roof", "beige, white, round"], "captions_pred_pc": ["above a black and white image of a decorative metal bar", "of a 3d model of a bracelet 3d model of a bracelet on a white background royalty free illustration 2019"], "captions_pred_image": ["a 3d model of a room with wooden walls and a rug on the floor", "a white ceramic bowl sitting on top of a gray surface"], "question": "which object is round?", "label": 1}, {"captions": [" a small triangular-shaped object.", " of two rocks with ice elements."], "sample_ids": ["2d02985030804209a26c2c53b96a06f9", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["shape, triangle, small", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white image of a piece of metal", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a black piece of furniture on a white background", "a 3d image of two rocks on a gray surface"], "question": "which object is a rock?", "label": 1}, {"captions": [" a large house with a roof on a platform.", "star wars stormtrooper "], "sample_ids": ["cb3e09a301b746918a682a595037c7f7", "05678f11f4fe47178c9b4941ee334e16"], "properties": ["roof, platform, house", "a, color, white"], "captions_pred_pc": ["a black and white image of a piece of paper", "a black and white illustration of a stormtrooper"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a star wars stormtrooper"], "question": "which entity is white", "label": 1}, {"captions": ["a 3d-printed green plastic cylinder with a hole in the middle.", " a wooden shed with a gray roof."], "sample_ids": ["9faa0c251d394f368f4f537ea21f977f", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["color, material, shape", "roof, color, gray"], "captions_pred_pc": ["a black and white image of a glittering object", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white object on a gray background", "a 3d model of a shed with a gray roof"], "question": "which entity is made of wood", "label": 1}, {"captions": ["a collection featuring a broken egg in a bowl, food wrapped in paper, torn and whole paper pieces, a white paper hat, a skull with a hat, a snowy iceberg, and a piece of cake.", "a featuring a red hat, floating cup, bowl filled with candy, and a strawberry."], "sample_ids": ["3d7bd392c9a14f4ab7c0aae2cf75b487", "e27a9fd533dc41da9cf2eeb8fee2a5af"], "properties": ["hat, food, bowl", "hat, candy, strawberry"], "captions_pred_pc": ["in 15 words or less a black and white image of dots on a white background", "a black and white illustration of two spheres"], "captions_pred_image": ["a 3d model of the earth with a hole cut out of it", "a black and white image of a person wearing a hat"], "question": "which entity has a bowl filled with candy?", "label": 1}, {"captions": [" of a green and yellow striped tank-like container.", " of a wine corkscrew"], "sample_ids": ["3b54b381174e4899b4b2ddc9de55eb7b", "07047b273add4f6fb2075fd176a50cd9"], "properties": ["color, shape, material", "a, type, corkscrew"], "captions_pred_pc": ["a black and white square made up of many small dots on a white background", "a black and white image of a corkscrew"], "captions_pred_image": ["a 3d model of a gray and white storage container on a white background", "a 3d model of a wine bottle opener"], "question": "which object is a type of corkscrew", "label": 1}, {"captions": ["smiley-faced banana .", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["cc0099a687194a31a052ac761f5fdfea", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["face is smiling, banana is yellow, smiley face is a sticker", "lizard, rock, stuffed animal"], "captions_pred_pc": ["above a black and white image of a surfboard on a white background", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a banana with a smiling face", "a 3d model of a vehicle with wheels and tires"], "question": "which entity has a stuffed animal?", "label": 1}, {"captions": [" a set of yellow and black tools, including a magnifying glass, hexagonal keys, and screwdrivers.", " a rusty green metal box with a handle and a gun inside."], "sample_ids": ["40285a60f32749a8ae38957c7b073fe8", "76cfd0e88ce243d483919a018a4f1a9e"], "properties": ["color, yellow, black", "box, handle, gun"], "captions_pred_pc": ["a set of three black and white keychains on a white background", "a black and white square with dots on a white background"], "captions_pred_image": ["a 3d model of a pair of hexagonal keys and a pair of hexagonal scissors royalty free 3d model preview no 3", "a 3d rendering of a metal box with a handle"], "question": "which entity is a box?", "label": 1}, {"captions": [" a blue and white building structure with a table and suspended ceiling.", " a house with a wooden-framed roof structure."], "sample_ids": ["fc63507c452c4a4b89f614f2b8bff76a", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["color, table, ceiling", "roof, material, wood"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a room", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a table with multiple tables stacked on top of each other", "a 3d model of a building with a roof"], "question": "which structure has a wooden-framed roof", "label": 1}, {"captions": ["a 3d white object featuring stacked racks, toy train, blocks, plastic pipes, lego pieces, and clothes hangers.", "s of a cat, fish, person with a hat, pig with a green hat and swimsuit, and a green and pink bird."], "sample_ids": ["c9c786f133f54d5e8d99bfc1a588df41", "402601779d1d4146b4cde106dfff1b27"], "properties": ["stacked, racks, toy train", "s, cat, fish, person, pig, bird"], "captions_pred_pc": ["a black and white photo of a person standing in front of a white wall", "above a black and white photo of a toy octopus on a white background"], "captions_pred_image": ["a 3d model of a ship's propeller on a white background", "a snowflake in the air on a cloudy day"], "question": "which entity has a cat", "label": 1}, {"captions": [" a building with yellow and white columns, wooden floor, and a ceiling featuring numerous yellow poles.", " of a crab holding a rubik's cube."], "sample_ids": ["1cf4b8f4e6014d36b6537c6ef52ccb96", "d351e74340e14ef09fb24b69dd4a6502"], "properties": ["floor, ceiling, poles", "image is a crab holding a rubik's cube"], "captions_pred_pc": ["a black and white drawing of a square with dots on it", "a black and white drawing of a spider on a white background"], "captions_pred_image": ["a 3d model of a building with multiple levels", "a black and white image of a crab holding a cube"], "question": "which entity is a picture of a crab holding a rubik's cube?", "label": 1}, {"captions": [" of a white round table with a red accent.", " a large metal building with a roof and truss structure."], "sample_ids": ["fea62a865b4e40899d95785533818329", "b85a99699ccd4bcba213322113bb253d"], "properties": ["color, white, red", "roof, truss, structure"], "captions_pred_pc": ["above a black and white photograph of a person hanging from the ceiling", "of a metal grate on a white background"], "captions_pred_image": ["an empty white plate on a gray background", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": [" of a small wooden house with two roofs.", " a house featuring a detailed roof structure and a suspended ceiling with a map on it."], "sample_ids": ["30fc23ae4edb42609e30e029dede54bd", "ee7e6031912b46bc8ca7205a959c5c16"], "properties": ["house, roof, wooden", "roof, structure, suspended"], "captions_pred_pc": ["of a pair of stainless steel screws on a white background", "a black and white image of a piece of lace"], "captions_pred_image": ["a 3d model of a small barn", "a 3d model of a house with a metal roof"], "question": "which house has a roof that is suspended?", "label": 1}, {"captions": ["a white 3d object featuring black and white patterns, resembling a combination of a dish, smoke detector, cake, bowl, alarm clock, and ceiling light.", "a white teapot with pink flowers and a handle."], "sample_ids": ["6a8cc820f00a4cfc954d56e2b1f6206a", "f6c5e8931d164979a71914127c7e5438"], "properties": ["- material is plastic- color is white- shape is cylinder", "color, white, handle"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a plate", "a black and white dots on a white background"], "captions_pred_image": ["a cake with a black and white design on the top of the cake", "a white tea kettle with a black handle and floral design"], "question": "which object is whiter", "label": 1}, {"captions": [" a red and white robot.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["75e058152d8e438a9c6f8abf112e8d89", "97e000ff41094665afd94ea565da8b13"], "properties": ["color, red, white", "roof, material, wood"], "captions_pred_pc": ["a black and white drawing of a monkey wearing a hat", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d model of a robot standing in a neutral pose royalty free 3d model preview no 3", "a 3d model of the roof of a building"], "question": "which entity is made of wood", "label": 1}, {"captions": ["a white ceramic vase with the words 'happy' and 'fish' written on it.", "a white of a spaceship and building."], "sample_ids": ["243cd2c469984313b1522dca099eefd3", "bf7d4277c9184d35abdec85bd5e25956"], "properties": ["color, white, material, ceramic", "image, building, spaceship"], "captions_pred_pc": ["a black and white image of a vase in the shape of a fish", "a black and white drawing of a tree"], "captions_pred_image": ["a white vase on a grey background", "a 3d model of a white object on a gray background"], "question": "which is not a building", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " a house featuring a pink-purple roof with trusses and wooden ceiling beams."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "b6b6a3f82bdd47c3afaf9af885ba8703"], "properties": ["roof truss, insulation, suspended ceiling", "roof, trusses, beams"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "a black and white pattern of dots in the shape of a square royalty free illustration"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d model of the roof of a building"], "question": "which entity has a roof with trusses and wooden ceiling beams?", "label": 1}, {"captions": ["a white 3d printed ring with a bow, snake design, and two arms.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["1fa054d12a084b7786d1185e0dc8787c", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["color, material, shape", "torso, breasts, pattern"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a virus on a white background stock illustration", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a 3d printed ring with a unique design", "a 3d model of a woman's chest"], "question": "which entity is a torso?", "label": 1}, {"captions": [" of a green box with four compartments", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["85599667bbcf4d7aaeaa11251be6280b", "b896a0898efe4059a776193c02132129"], "properties": ["color, shape, number", "- material is stone, metal, concrete"], "captions_pred_pc": ["a group of black dots on a white background, arranged in the shape of a sofa", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d rendering of a black box with four compartments", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": [" a white hand, ear bud, teddy bear, earpiece, and utensil.", " a small purple plastic chair with four legs."], "sample_ids": ["1c59287d496f4da6b245a01d25a7e2a4", "1bb40ec897884b788dc0a2dac090f347"], "properties": ["earbud, earpiece, earpiece", "color is purple, material is plastic, number of legs is four"], "captions_pred_pc": ["a black and white drawing of a hand holding a cup", "of a black and white image of a baby carrier"], "captions_pred_image": ["a white object on a grey background", "a 3d model of a white plastic chair"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a white lighthouse building with a clock tower, rocket, and ladder.", "a white glass beer mug."], "sample_ids": ["88eba412c78a4ced89eb857327653f6c", "1d686cbd3e9a4c629a43088658989286"], "properties": ["lighthouse, building, tower", "color, white, glass"], "captions_pred_pc": ["a black and white illustration of a water droplet on a white background", "a black and white drawing of a beer mug on a white background"], "captions_pred_image": ["a 3d model of a white sofa on a white background", "a 3d model of a glass pitcher"], "question": "which object is made of glass", "label": 1}, {"captions": [" of a wooden staircase with marble floor and wooden railings in a house.", " a small house with a staircase, balcony, and wooden floor."], "sample_ids": ["ee70964ce5e841bd87381cff40d59b88", "e67e211004cb450cbaf8139dd74ba39b"], "properties": ["floor, staircase, railings", "floor, staircase, balcony"], "captions_pred_pc": ["a black and white drawing of a light switch", "a black and white drawing of a wallet"], "captions_pred_image": ["a 3d model of a staircase on a marble surface", "a 3d model of a bench on a wooden floor"], "question": "which staircase is made of wood", "label": 1}, {"captions": [" a building featuring yellow columns, a yellow roof, and a wooden structure.", "a white of a man with arms outstretched."], "sample_ids": ["0ce6a4102f4f40e2a0084938b0a93941", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["structure, columns, roof", "image, color, white"], "captions_pred_pc": ["a black and white drawing of a window", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a 3d model of a building with multiple levels", "a 3d model of a man with his arms outstretched"], "question": "which image is white", "label": 1}, {"captions": [" a house with a roof structure, featuring a brick wall and suspended box.", " a white and blue building with a black roof."], "sample_ids": ["1a7bfcf3755142bab90d3d7cb02d0f2c", "c893118316ee43e18322e5964b2806c5"], "properties": ["roof, structure, wall", "color, white, blue, roof, black"], "captions_pred_pc": ["a black and white illustration of a group of dots on a white background", "a black and white illustration of a person standing on top of a hill made up of tiny dots"], "captions_pred_image": ["a 3d model of a building with a roof", "a 3d model of a white building on a gray background royalty free 3d model no."], "question": "which building has a black roof", "label": 1}, {"captions": [" a mountain range in a mountainous area with a large green field and a small arrow.", "s of a boat, bird, paper airplane, and kite flying in the air."], "sample_ids": ["e80eca95874c4bffb8aec54044824d87", "795cfa41d48a4cfc893ff1981318594d"], "properties": ["arrow, mountain, area", "s, boat, bird, airplane, kite"], "captions_pred_pc": ["a line with a square on it", "above a 3d illustration of a boy standing with his arms outstretched"], "captions_pred_image": ["a 3d model of a mountain range royalty free 3d model no.", "a white kite flying in the air against a gray background"], "question": "which entity has a boat", "label": 1}, {"captions": [" a white hospital operating room with blue containers and medical equipment.", " a house with a roof structure, featuring a brick wall and suspended box."], "sample_ids": ["a10f3196831647bc8842eacac640047d", "1a7bfcf3755142bab90d3d7cb02d0f2c"], "properties": ["color, white, containers", "roof, structure, wall"], "captions_pred_pc": ["a black and white illustration of the letter 'f' made up of tiny dots", "a black and white illustration of a group of dots on a white background"], "captions_pred_image": ["a 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room", "a 3d model of a building with a roof"], "question": "which entity has a roof structure", "label": 1}, {"captions": [" of a computer with a green screen, keyboard, and white box.", " a small white barn with a metal roof."], "sample_ids": ["fb1e5a04ef4644f98219e1d5d52ab073", "4ca3342a96824684845f7d0e062ab176"], "properties": ["screen, keyboard, box", "roof, metal, white"], "captions_pred_pc": ["a box made up of many small dots on a white background a box made up of many small dots on a white background royalty free illustration", "in 15 words or less a black and white illustration of a house made of dots"], "captions_pred_image": ["a 3d model of a vintage computer royalty-free 3d model preview", "a 3d model of a barn"], "question": "which object has a white roof", "label": 1}, {"captions": [" a wooden-framed house with roof trusses.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["e60dd370c5ec468da4689a801f951157", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["frame, roof, trusses", "house, roof, wooden"], "captions_pred_pc": ["a black and white drawing of a metal grate", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a house under construction", "a black and white photograph of a birdhouse"], "question": "which house has a wooden roof", "label": 0}, {"captions": ["a featuring a skeleton, torn paper, long stick, rock, and broken wood.", " a white castle composed of small cubes."], "sample_ids": ["46903bf029934b1989bc062dcb0a5531", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["skeleton, torn, paper, long stick, rock, broken wood", "composed of, white, cubes"], "captions_pred_pc": ["a close up of a black object on a white background", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["a 3d sculpture of a person's hand in the air royalty-free 3d model preview", "a 3d model of a castle made of white cubes"], "question": "which object is composed of white cubes", "label": 1}, {"captions": [" a small white box with a shelf and a hole in it.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["d023ae78bc5a436eaba13c5ecdd45c56", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["a, hole, shelf", "house, fence, playground"], "captions_pred_pc": ["a black and white drawing of a dotted square on a white background", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a 3d model of a white box on a gray background", "a 3d model of a room with a lot of wires"], "question": "which entity has a fence", "label": 1}, {"captions": ["s of a plane, house, pile of rocks, and islands, accompanied by watercolor paintings of a tree, rocks, and a boat in a sand dune.", "a featuring a skeleton, torn paper, long stick, rock, and broken wood."], "sample_ids": ["8da8da6ccf5f4011a4115977c55d1cb8", "46903bf029934b1989bc062dcb0a5531"], "properties": ["s, plane, house", "skeleton, torn, paper, long stick, rock, broken wood"], "captions_pred_pc": ["a black and white silhouette of a map", "a close up of a black object on a white background"], "captions_pred_image": ["a black and white image of a small island in the middle of a body of water", "a 3d sculpture of a person's hand in the air royalty-free 3d model preview"], "question": "which entity has a skeleton?", "label": 1}, {"captions": ["a white featuring a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet.", " featuring a sandbox, sand bucket, wooden blocks, water container, and a lamp made out of blocks."], "sample_ids": ["f28f57745af04115b0bacdde80c3b9ee", "674a36147ffb47059e48abc9fa19d923"], "properties": ["a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet", "sandbox, sand bucket, wooden blocks"], "captions_pred_pc": ["a black and white illustration of a pair of sunglasses", "for a black and white photo of a basketball hoop"], "captions_pred_image": ["a 3d model of a medieval drinking horn 3d model available for 3d printing royalty free 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d", "a 3d model of a brick, a box, and a pile of dirt royalty free 3d model preview no. 3"], "question": "which entity has a sand bucket?", "label": 1}, {"captions": [" a leather recliner chair and ottoman set, featuring swivel functionality and available in modern orange, tan, and brown colors.", " a clear glass table with metal legs and balls on top."], "sample_ids": ["44be138ae8e2409bbbca44a96fc67d45", "7c2bfa826f274377ac21f48d510848c3"], "properties": ["color, tan, brown, orange", "glass, metal, balls"], "captions_pred_pc": ["above a black and white illustration of an office chair", "a black and white image of a wine glass"], "captions_pred_image": ["a grey leather lounge chair with ottoman and footstool", "a clear acrylic foosball table"], "question": "which object is made of metal", "label": 1}, {"captions": [" a small white and brown house with windows and a door.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["7de99253a4bc4d98bd941e40bbad8c7a", "97e000ff41094665afd94ea565da8b13"], "properties": ["color, door, window", "roof, material, wood"], "captions_pred_pc": ["of a person wearing a black and white striped shirt", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d model of a police box royalty free 3d model preview no.2", "a 3d model of the roof of a building"], "question": "which house has a wooden roof structure", "label": 1}, {"captions": [" of a wooden building frame with truss and roof structure.", " a house featuring a pink-purple roof with trusses and wooden ceiling beams."], "sample_ids": ["1313f8185cf24f3bbd73ff4e4ddfab3e", "b6b6a3f82bdd47c3afaf9af885ba8703"], "properties": ["frame, truss, roof", "roof, trusses, beams"], "captions_pred_pc": ["a black and white image of a ladder on a white background", "a black and white pattern of dots in the shape of a square royalty free illustration"], "captions_pred_image": ["a 3d rendering of a bridge over a road", "a 3d model of the roof of a building"], "question": "which entity has a roof with trusses and wooden ceiling beams?", "label": 1}, {"captions": [" a white plastic container with a lid, a small box, a cup, a bottle, and a jar.", " a small house on an island with trees, shrubs, a pool, and a lake."], "sample_ids": ["20a02705a66f460492e07345e84a62ed", "c8331489fca44685bedfa1bdadf6ccb3"], "properties": ["a box, a cup, a bottle, a jar", "house, lake, pool"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white image of a pattern on a piece of paper"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic cap, and a plastic container", "a 3d model of a large building"], "question": "which entity has a pool", "label": 1}, {"captions": [" a futuristic black and gold spaceship with a gun on it.", " a small house with a staircase, balcony, and wooden floor."], "sample_ids": ["6c34eb48b0c44667864a2af3fed92d6c", "e67e211004cb450cbaf8139dd74ba39b"], "properties": ["color, shape, gun", "floor, staircase, balcony"], "captions_pred_pc": ["above a black and white photograph of a fighter jet", "a black and white drawing of a wallet"], "captions_pred_image": ["a 3d model of a futuristic space fighter", "a 3d model of a bench on a wooden floor"], "question": "which entity has a staircase", "label": 1}, {"captions": [" a wooden table and bench with a deer head and branch on it.", "3d white playground set with slides and swings, featuring a plane, building, and dragon in the sky."], "sample_ids": ["857d5391612349f4ae6cd854a1ec96de", "e694d53545d449319a64cceb0280c3c6"], "properties": ["table, bench, deer", "3d, slide, swing"], "captions_pred_pc": ["a black and white drawing of a table and chairs", "for a 3d model of the letter 'j'"], "captions_pred_image": ["a black and white image of a bench and table with a deer's head on the table", "a 3d model of a playground slide"], "question": "which entity has a plane in the sky?", "label": 1}, {"captions": ["a small clay jug with a face, handle, and spout, depicted as a .", " a small building with windows and a roof."], "sample_ids": ["ceee98c20f23424195da092156905ec4", "0ef2cac27e364c0687afae7ab5040cc3"], "properties": ["face, handle, spout", "roof, windows, building"], "captions_pred_pc": ["a black and white image of a small, circular object", "a black and white square made up of small dots on a white background"], "captions_pred_image": ["a white ceramic vase with a face on it", "a 3d model of an apartment building royalty free 3d model preview no 3"], "question": "which entity has a roof", "label": 1}, {"captions": [" a multicolored metal building structure with a roof.", " of a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["22483891fd124baca3bbc6a6a49adc9c", "8cd3f5ff0fc041eca9a480faa6739480"], "properties": ["color, roof, structure", "roof, trusses, beams"], "captions_pred_pc": ["of a black and white photo of a bike on a white background", "in 15 words or less a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a barn structure", "a 3d model of a roof structure"], "question": "which structure has a roof", "label": 1}, {"captions": [" a building or house with a roof and floor plan, resembling a pyramid with a flat roof.", " a two-story small apartment building with a roof."], "sample_ids": ["7a91292e1ed64e60a1bbbb499209a0df", "8d1102e923954604ae7045a7ca14c1f6"], "properties": ["apse, roof, floor plan", "two-story, roof, building"], "captions_pred_pc": ["a black and white drawing of a room", "a black and white pattern of dots in the shape of the letter c on a white background vector illustration of a black and white pattern of dots in the shape of the letter c on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a building in the shape of a pyramid", "a 3d model of an apartment building royalty free 3d model preview no 2"], "question": "which building has a roof", "label": 1}, {"captions": [" a house featuring a green roof and red frame.", " a house with a roof, wooden beams, and chimney."], "sample_ids": ["00d9a408067d46afa127a404f63b4f65", "be1376023c274bdda995d54f3694157f"], "properties": ["color, roof, green, frame, red", "roof, beams, chimney"], "captions_pred_pc": ["a black and white illustration of a house made of dots", "a black and white drawing of a bathroom with a shower"], "captions_pred_image": ["a 3d model of a building with a metal roof", "a 3d model of a house with a roof"], "question": "which house has a chimney?", "label": 1}, {"captions": [" an ice cream machine, popsicle with two sticks, and a shelf with a computer monitor.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["cb840159fea7436d81eb33bdccad3596", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["A, a, a", "house, roof, wooden"], "captions_pred_pc": ["a black and white illustration of a bench", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d rendering of a white and gray wall mounted shelf", "a black and white photograph of a birdhouse"], "question": "which object has a roof", "label": 1}, {"captions": [" a green electrical utility box with a warning sign and a small blue box on a concrete base.", " a white plastic box/tray with a hole in the middle."], "sample_ids": ["8e39d4766ed4444ea527d6c5ea33a5ef", "04f8bfad8ad14795aced8a83ea30ca60"], "properties": ["color, base, warning", "color is white, material is plastic, shape is box"], "captions_pred_pc": ["a black and white illustration of a box with dots on it", "a black and white image of a rectangular tray on a white background"], "captions_pred_image": ["a 3d model of an electrical box royalty-free 3d model preview no.2", "a 3d model of a white plastic tray"], "question": "which box is made of plastic", "label": 1}, {"captions": [" a vibrant city skyline featuring various colored buildings, trees, and skyscrapers.", " a wooden shed with a gray roof."], "sample_ids": ["1a1fb9b0d83845f6b1238fb45e0defff", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["color, skyline, buildings", "roof, color, gray"], "captions_pred_pc": ["a black and white illustration of a city skyline", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a black and white 3d model of a city skyline", "a 3d model of a shed with a gray roof"], "question": "which building is gray", "label": 1}, {"captions": [" a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole.", " a house featuring a pink-purple roof with trusses and wooden ceiling beams."], "sample_ids": ["b16fb21cda9a4a21a024df749c2304f4", "b6b6a3f82bdd47c3afaf9af885ba8703"], "properties": ["roof, ceiling, hole", "roof, trusses, beams"], "captions_pred_pc": ["a black and white image of a square with dots on it", "a black and white pattern of dots in the shape of a square royalty free illustration"], "captions_pred_image": ["a 3d model of a small house and a tree in the foreground", "a 3d model of the roof of a building"], "question": "which entity has a roof with trusses and wooden ceiling beams?", "label": 1}, {"captions": ["a pair of yellow pliers.", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["570d29f10e5b428b91da27cff52bac56", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["color, yellow, pliers", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["of a pair of pliers on a white background vector illustration of a pair of pliers on a white background illustration", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a pair of scissors on a white background", "a 3d rendering of a plastic box with several compartments"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a large, black and white circular building, resembling a stadium or ring structure.", " a white castle composed of small cubes."], "sample_ids": ["67f46bb0048244c687a58d1017a08f6b", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["building, color, black and white", "composed of, white, cubes"], "captions_pred_pc": ["the letter c in black and white illustration of the letter c in black and white illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustr", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["a 3d model of a circular fence with black and white stripes", "a 3d model of a castle made of white cubes"], "question": "which building is composed of small cubes", "label": 1}, {"captions": [" of a red and white pokeball with the word \"pichu\" on it, compatible with various modeling and animation software.", " a small wooden cabinet with a drawer, doubling as a bedside table."], "sample_ids": ["7d76c44cb9dd4948b8766d83994ca5f3", "3755f3c19ae549c4bf708462db1b2581"], "properties": ["- material is stl, obj, fbx- size is 240px- color is red, white", "Cabinet, drawer, wood"], "captions_pred_pc": ["a black and white circular pattern on a white background", "a black and white image of a square with dots all over it"], "captions_pred_image": ["a close-up view of a black ball with a screw in the center", "a 3d model of a wooden box with a lid"], "question": "which entity is made of wood", "label": 1}, {"captions": [" of a wooden stand featuring two legs, six balls, and a row of teddy bears, resembling a puzzle toy, game board, and bookshelf.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["e36ba9c060cd49f48a0acc1790fcf049", "bded33af34104b9686b845dfd18309a9"], "properties": ["resembles, toy, bookshelf", "table, staircase, light"], "captions_pred_pc": ["a black and white image of a book cover", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d rendering of a wooden display stand with six cups on it", "a 3d model of a small table with a staircase"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" a small house with a blue roof, a door, and a pool.", " a building featuring yellow columns, a yellow roof, and a wooden structure."], "sample_ids": ["40c52c2d278345c5b4e8d00a991271dc", "0ce6a4102f4f40e2a0084938b0a93941"], "properties": ["door, roof, pool", "structure, columns, roof"], "captions_pred_pc": ["of a black and white photo of a window with fringes", "a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a building with multiple levels"], "question": "which building has a wooden structure?", "label": 1}, {"captions": ["a featuring a graffiti-covered train, bench, wall, and skateboard.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["6de9fcac063d45df9424decdc215b379", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["graffiti, bench, wall", "camera, speaker, ceiling fan"], "captions_pred_pc": ["for a black and white image of a boat", "for a black and white image of an object on a white background"], "captions_pred_image": ["a graffiti-covered wall in a black and white photograph", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", " an old building with windows, doors, and a balcony on a street."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "706fb93f885d42f594e0ebbba632d2f2"], "properties": ["color, material, structure", "building, balcony, street"], "captions_pred_pc": ["a black and white drawing of a room with dots", "in 15 words or less a black ink brush stroke on a white background"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a 3d model of an old building"], "question": "which building has a balcony", "label": 1}, {"captions": [" a pink room featuring a bed, desk, window, and lamp.", " of a machine gun with additional items."], "sample_ids": ["395af20de6fe49dbbbb030f0e452cbe1", "d6e472d088b647c4bf07105b0fba3dba"], "properties": ["bed, desk, window", "gun, type, machine gun"], "captions_pred_pc": ["of a black and white drawing of a curved line", "a black and white illustration of an airplane in the shape of the letter 'a'"], "captions_pred_image": ["a 3d model of a bedroom royalty free 3d model preview no.2", "a 3d model of a submachine gun"], "question": "which object is more likely to be used for hunting", "label": 1}, {"captions": [" a room featuring a wall with a painting, a hole, and a door.", " of a machine gun with additional items."], "sample_ids": ["1d1328346a464d2482463d6d5288e934", "d6e472d088b647c4bf07105b0fba3dba"], "properties": ["painting, door, wall", "gun, type, machine gun"], "captions_pred_pc": ["in one hundred words or less an illustration of an igloo on a white background stock illustration", "a black and white illustration of an airplane in the shape of the letter 'a'"], "captions_pred_image": ["a black and white photograph of a torn piece of paper in the shape of a bird", "a 3d model of a submachine gun"], "question": "which object is a type of gun", "label": 1}, {"captions": ["a low poly of a pink donut with eyes, mouth, and yellow frosting.", " tall grass, plants, rocks, and a tree."], "sample_ids": ["1271987b3fed464daad412ebce14d33e", "eefed882ed5f4711bc5a76332d9712f3"], "properties": ["color, mouth, eyes", "grass, plants, rocks"], "captions_pred_pc": ["of a silver bowl with black dots on the surface", "a black and white drawing of a tree"], "captions_pred_image": ["a 3d model of a white donut with eyes", "a 3d model of a group of trees"], "question": "which entity has more grass", "label": 1}, {"captions": ["a 3d pink spiky spherical flower.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["039a3fc74e39450883c46acbe2f57476", "a17477b445b3443189dad22f768b888b"], "properties": ["color, shape, texture", "roof, pillar, stairs"], "captions_pred_pc": ["a black and white drawing of a snowflake on a white background", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d model of a snowflake on a white background", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": [" of a pile of metal, torn and shredded paper, rocks, and a decayed animal.", "a featuring a red hat, floating cup, bowl filled with candy, and a strawberry."], "sample_ids": ["c117f1923cad4ecf9df61b6e3d633374", "e27a9fd533dc41da9cf2eeb8fee2a5af"], "properties": ["a pile of metal, torn and shredded paper, rocks, and a decayed animal", "hat, candy, strawberry"], "captions_pred_pc": ["a black and white map of germany on a white background", "a black and white illustration of two spheres"], "captions_pred_image": ["a 3d model of a crumpled piece of paper on a white background", "a black and white image of a person wearing a hat"], "question": "which entity has a strawberry?", "label": 1}, {"captions": [" of a white rectangular shelf or light fixture on a gray background.", " a house with a blue roof."], "sample_ids": ["a84221f27ed0416280f8e67563f95ed7", "8ff693cd3ca74f8a901ca259b8b3a7ac"], "properties": ["background, color, white", "roof, color, blue"], "captions_pred_pc": ["a black line on a white background", "a black and white drawing of a cross on a white background royalty free illustration"], "captions_pred_image": ["a long white plastic strip on a gray background", "a 3d model of a house with a roof"], "question": "which entity has a blue roof", "label": 1}, {"captions": [" a small house with a roof and ceiling-mounted air conditioner.", " a group of people standing together in a line."], "sample_ids": ["6965067ea9e34357a7af21a2f078fbac", "96c9da7f3c2d4f4b90485f899ec5d605"], "properties": ["roof, air conditioner, house", "group, people, line"], "captions_pred_pc": ["a black and white illustration of a window", "a group of people walking in a line on a white background"], "captions_pred_image": ["a 3d rendering of a small house with a covered porch", "a 3d model of a group of people standing next to each other"], "question": "which entity is a group of people standing together in a line?", "label": 1}, {"captions": [" a green skull and sphere.", " a small purple plastic chair with holes."], "sample_ids": ["4f4dc1300ab24b0a910da77a4d5e783f", "fe2bf0f8f5c64dd6bac3e2da0d1b89d0"], "properties": ["color, skull, sphere", "color, plastic, purple"], "captions_pred_pc": ["a black and white illustration of a dandelion on a white background dandelion illustration on a white background illustration", "this image may contain clothing apparel accessory purse bag and handbag"], "captions_pred_image": ["a 3d model of a skull on a gray background", "3d model of a chair royalty free 3d model preview no 3"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a 3d object featuring a rock, shell, piece of paper, and cat.", "a black and white of a knife/sword with a handle."], "sample_ids": ["53efab50e5a74e5ea165c763cea15be4", "c7692fa635e049bda0a2039fa5a784a4"], "properties": ["a, rock, paper", "image, color, black and white"], "captions_pred_pc": ["for a flock of birds in the sky", "of a black and white knife on a white background"], "captions_pred_image": ["a black and white image of a piece of paper in the shape of an island", "a black and white image of a knife"], "question": "which entity is a black and white image?", "label": 1}, {"captions": [" a medieval stone castle with walls and stairs.", " a small house with a staircase, balcony, and wooden floor."], "sample_ids": ["10bc1bbec4c045f9b15fc2156b5e32ee", "e67e211004cb450cbaf8139dd74ba39b"], "properties": ["wall, stairs, castle", "floor, staircase, balcony"], "captions_pred_pc": ["a castle in the snow a castle in the snow illustration on a white background royalty free illustration", "a black and white drawing of a wallet"], "captions_pred_image": ["a 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the", "a 3d model of a bench on a wooden floor"], "question": "which entity has a staircase", "label": 1}, {"captions": [" of a white plastic tube with a hole and a chip on it.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["9968e06a62e8487ea33460e640abc573", "bf18bfd89efd43389781050230467d58"], "properties": ["color is white, material is plastic, shape is tube", "Lights, number, five"], "captions_pred_pc": ["a black and white image of a broom on a stand", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a white object on a gray background", "a white chandelier with five white shades"], "question": "which object is made of glass?", "label": 1}, {"captions": [" of a small wooden house with a roof.", " a metal building with a purple roof and cage structure."], "sample_ids": ["f5904a9d87ff4fa688146c18c1f27fec", "cbc10fb816034537b052e7c8fb75c4a6"], "properties": ["roof, house, wooden", "roof, purple, structure"], "captions_pred_pc": ["a black and white drawing of a house with dots", "for a black and white image of a bench"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a building with a metal roof"], "question": "which building has a cage structure", "label": 1}, {"captions": [" featuring a table with objects, a stone wall with a ball, and a teapot and vase on a tiled floor.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["654c49e07bb54e6b94637b5f7b65bf08", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["floor, table, wall", "camera, speaker, ceiling fan"], "captions_pred_pc": ["a black and white illustration of a coffee cup, a spoon, and a knife on a black background", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a bench on a tiled floor royalty free 3d model preview no.3", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": [" of a meat skewer with a small piece of bread and a sausage on a stick.", " of a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["1728f2cb8eca4080af02b22262ff45d5", "8cd3f5ff0fc041eca9a480faa6739480"], "properties": ["meat, bread, sausage", "roof, trusses, beams"], "captions_pred_pc": ["a black and white image of a brush on a white background", "in 15 words or less a black and white drawing of a window"], "captions_pred_image": ["an image of a small white object on a gray background", "a 3d model of a roof structure"], "question": "which entity has more trusses", "label": 1}, {"captions": ["a white 3d printed object featuring a ring with multiple holes, a helmet with a hole, and a bowl with floral designs.", "a gold ring featuring various animal designs, including a swan, snake, bird, and dragon, accompanied by a gold-plated frog sculpture."], "sample_ids": ["62fb765ff1a64152b7721a2dd3a19736", "5e8319ec9a824ddcb3eef89658ef90f8"], "properties": ["ring, helmet, bowl", "gold, ring, animal"], "captions_pred_pc": ["a black and white image of a circle made up of tiny dots", "above a black and white image of a shark"], "captions_pred_image": ["a 3d printed ring in white 3d printed material", "a 3d sculpture of a bird with its wings outstretched"], "question": "which ring has animal designs", "label": 1}, {"captions": [" a large building featuring stairs, a clock tower, a balcony, and a roof.", " a small rocky island with a brown and white rug featuring a puddle on it."], "sample_ids": ["e7c78316f9cb4b8aad57a9c933f5278b", "4806b382466247ad9265fc8240a22d3d"], "properties": ["building, roof, balcony", "rocky, rug, puddle"], "captions_pred_pc": ["a black and white illustration of a group of dots in the shape of a square on a white background royalty free illustration", "a black and white image of a long, curved line on a white background"], "captions_pred_image": ["a 3d model of a building with a clock tower", "a 3d image of a rug on the ground, with a small puddle in the center royalty free 3d model preview no. 1"], "question": "which entity is not a building?", "label": 1}, {"captions": ["a featuring white and red cubes, and a pink and white chair.", " a house with a roof, wooden beams, and chimney."], "sample_ids": ["f2c44a82ba744ba8b93e9a1c2272c117", "be1376023c274bdda995d54f3694157f"], "properties": ["color, white, red, pink", "roof, beams, chimney"], "captions_pred_pc": ["a black and white illustration of a house made of dots", "a black and white drawing of a bathroom with a shower"], "captions_pred_image": ["a 3d model of a white structure with stairs", "a 3d model of a house with a roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" a modern office building with a green door, green roof, windows, and blue lights.", " a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole."], "sample_ids": ["d6087023095446fbadef1721478373b2", "b16fb21cda9a4a21a024df749c2304f4"], "properties": ["door, roof, window", "roof, ceiling, hole"], "captions_pred_pc": ["a black and white drawing of a toilet brush", "a black and white image of a square with dots on it"], "captions_pred_image": ["a 3d model of an apartment building", "a 3d model of a small house and a tree in the foreground"], "question": "which entity has a roof with a hole", "label": 1}, {"captions": ["a white teapot with pink flowers and a handle.", "a white teapot with pink flowers and a handle."], "sample_ids": ["f6c5e8931d164979a71914127c7e5438", "f6c5e8931d164979a71914127c7e5438"], "properties": ["color, white, handle", "color, white, handle"], "captions_pred_pc": ["a black and white dots on a white background", "a black and white dots on a white background"], "captions_pred_image": ["a white tea kettle with a black handle and floral design", "a white tea kettle with a black handle and floral design"], "question": "which teapot has a white handle?", "label": 0}, {"captions": ["a 3d object featuring a flat roof with blue and white pattern, a purple and white bench, a blue and purple striped runner, a bed with blue and purple stripes, blue and white striped paper, a bench with a blue and purple pattern, and a ceiling light fixture with wooden slats.", " a house featuring a pink-purple roof with trusses and wooden ceiling beams."], "sample_ids": ["28d0cf71ed684dcb87b2c9b2744c3633", "b6b6a3f82bdd47c3afaf9af885ba8703"], "properties": ["runner, bed, bench", "roof, trusses, beams"], "captions_pred_pc": ["a black and white drawing of a snowflake on a white background a creative black and white drawing of a snowflake on a white background royalty free illustration", "a black and white pattern of dots in the shape of a square royalty free illustration"], "captions_pred_image": ["a 3d model of a long, curved structure", "a 3d model of the roof of a building"], "question": "which entity has a roof with trusses?", "label": 1}, {"captions": [" of a stone fountain with a lion statue, surrounded by an archway inspired by the arch of triumph.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["a72700696c3b44ef8101d1e71e914bc9", "b896a0898efe4059a776193c02132129"], "properties": ["lion, statue, fountain", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white image of a metal object", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d image of a lion statue on a wall", "a 3d model of an ancient statue"], "question": "which statue is made of stone", "label": 1}, {"captions": [" an ice cream machine, popsicle with two sticks, and a shelf with a computer monitor.", "a featuring a sailboat with a hook, a bird suspended in the air, and a pair of scissors."], "sample_ids": ["cb840159fea7436d81eb33bdccad3596", "f57ae66555d34349aeadc38b33f8f267"], "properties": ["A, a, a", "a, bird, hook"], "captions_pred_pc": ["a black and white illustration of a bench", "of a 3d scan of a person's torso and limbs"], "captions_pred_image": ["a 3d rendering of a white and gray wall mounted shelf", "a black and white photo of a kite flying in the sky"], "question": "which entity has a hook?", "label": 1}, {"captions": [" a house with a roof, roof truss, and suspended ceiling structure.", " a large steel and metal structure with a pool and scaffolding system."], "sample_ids": ["5abf69f79b92484fb54d41ff0c0a2c11", "5850d5c7223447db816081d50292fec0"], "properties": ["roof, truss, suspended ceiling", "structure, material, pool"], "captions_pred_pc": ["a suitcase made of dots on a white background a suitcase made of dots on a white background royalty free illustration", "a black and white drawing of a bridge with chains"], "captions_pred_image": ["a 3d model of a house with roof trusses", "a 3d model of a large concrete structure"], "question": "which structure is made of metal", "label": 1}, {"captions": [" a robotic warrior with a sword.", " of a small house featuring a flat, brown roof, table with two chairs and a stool, ceiling light, and a window."], "sample_ids": ["1c54afa26eb24e19b8660066718a9c5a", "3a509431d96b43f8a7aebe2846f08b96"], "properties": ["weapon, sword, robot", "roof, brown, flat"], "captions_pred_pc": ["a black and white image of a small white object on a black surface", "a black and white drawing of a snowflake on a white background a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of a robot standing on a piece of paper", "a 3d rendering of a table and stool"], "question": "which entity has a brown roof", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "bded33af34104b9686b845dfd18309a9"], "properties": ["yellow, table, roof", "table, staircase, light"], "captions_pred_pc": ["a black and white drawing of a floor plan", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of a small table with a staircase"], "question": "which table has a staircase?", "label": 1}, {"captions": [" a wooden-framed house with roof trusses.", " a small wooden house with a green roof."], "sample_ids": ["e60dd370c5ec468da4689a801f951157", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["frame, roof, trusses", "roof, color, green"], "captions_pred_pc": ["a black and white drawing of a metal grate", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d model of a house under construction", "a 3d model of a house with a ladder"], "question": "which house has a green roof", "label": 1}, {"captions": [" a metal-framed wall with red and blue bars in a steel structure.", " a small wooden house with a green roof."], "sample_ids": ["fefc99453e2d4406a9668d5697224c0f", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["color, red, blue, structure", "roof, color, green"], "captions_pred_pc": ["a black and white image of a person holding a toothbrush", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d rendering of a metal frame structure", "a 3d model of a house with a ladder"], "question": "which structure has a green roof", "label": 1}, {"captions": [" of a house with a pink roof.", " of a house with a roof truss, chimney, and suspended ceiling."], "sample_ids": ["6162909df6294848a8eea83c3aa9585b", "9401dfc901b2447a9c0eb27da56854d7"], "properties": ["color, roof, pink", "roof truss, chimney, suspended ceiling"], "captions_pred_pc": ["a black and white drawing of the letter 'p' on a white background illustration", "in 15 words or less a black and white illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a"], "captions_pred_image": ["a 3d model of a house in the style of the 1920s and 1930s", "a 3d model of a house with a roof"], "question": "which house has a roof truss", "label": 1}, {"captions": [" a destroyed building and a damaged yellow-green machine.", " a small building with a yellow roof, featuring a box, a chair, and a plane flying overhead."], "sample_ids": ["01406e7034fe4b7da32494c6cbf260f2", "a2354f13774340d392fbf33564934aab"], "properties": ["building, color, yellow-green", "building, roof, yellow"], "captions_pred_pc": ["a black and white photograph of a piece of paper", "a black and white image of a cell phone"], "captions_pred_image": ["a 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged", "a 3d rendering of a machine with a conveyor belt"], "question": "which building has a yellow roof", "label": 1}, {"captions": [" featuring a chair, table, and refrigerator.", " of a chessboard on a beige ceiling tile."], "sample_ids": ["cd967e38e9364ed28c4090e905740c9d", "7dccc542a68f470a947bf5f698c27bbd"], "properties": ["chair, table, refrigerator", "beige, tile, chessboard"], "captions_pred_pc": ["a 3d rendering of a white surface with black dots on it", "a black and white image of a leopard print banner"], "captions_pred_image": ["a 3d model of a chair, a table, and a refrigerator", "3d chess board royalty free 3d model preview no 2"], "question": "which entity is a chessboard on?", "label": 1}, {"captions": ["a featuring a pile of food, leaves, shredded paper, and rocks with scattered broken paper pieces.", " tall grass, plants, rocks, and a tree."], "sample_ids": ["5206d4d96c2d428b9c1f7ee0e13bcffb", "eefed882ed5f4711bc5a76332d9712f3"], "properties": ["food, leaves, shredded paper, rocks", "grass, plants, rocks"], "captions_pred_pc": ["a black and white image of a bird in flight", "a black and white drawing of a tree"], "captions_pred_image": ["a 3d model of a mountain range on a white surface", "a 3d model of a group of trees"], "question": "which entity has more rocks", "label": 1}, {"captions": ["a low poly of a tree with red apples and a green cactus with red dots.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["fd7765e391cd49ccbc72891d90850cdb", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["color, red, apples, tree, cactus, dots", "house, pool, balcony"], "captions_pred_pc": ["a black and white illustration of a snowflake on a white background", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a group of geometric shapes", "a 3d model of a modern house"], "question": "which entity has a pool", "label": 1}, {"captions": ["pink and green pendant light hanging from a ceiling.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["1651a898288149edb8cbff0e1b2d692b", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["color, pink, green", "island, mountain, grass"], "captions_pred_pc": ["above a black and white photo of a small circular object on a white background", "a black and white map of the island of malta"], "captions_pred_image": ["a white pendant light hanging from the ceiling", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more grass", "label": 1}, {"captions": [" a small house with a tree and a rock.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["9dc392a7f6e444e5bfb720684d6f864a", "bf18bfd89efd43389781050230467d58"], "properties": ["house, tree, rock", "Lights, number, five"], "captions_pred_pc": ["in 15 words or less the image depicts a white square on a white background with black dots all over the square stock illustration", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small house with a tree in front of it", "a white chandelier with five white shades"], "question": "which entity has more lights", "label": 1}, {"captions": [" of a round birthday cake with a single candle in the middle.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["76c834f15f664dbdb7c08ca1ff936e7c", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["candle, color, shape", "house, fence, playground"], "captions_pred_pc": ["a black and white illustration of a shower head", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a birthday cake with a candle on top royalty free 3d model", "a 3d model of a room with a lot of wires"], "question": "which entity has a fence", "label": 1}, {"captions": ["an orange of a forklift truck in a parking lot.", "s of a boat, bird, paper airplane, and kite flying in the air."], "sample_ids": ["32d757fbd29640ffb5aab34925525a29", "795cfa41d48a4cfc893ff1981318594d"], "properties": ["orange, forklift, parking lot", "s, boat, bird, airplane, kite"], "captions_pred_pc": ["a black and white illustration of a cell phone", "above a 3d illustration of a boy standing with his arms outstretched"], "captions_pred_image": ["a black and white photograph of a forklift", "a white kite flying in the air against a gray background"], "question": "which image shows a boat", "label": 1}, {"captions": [" of a destroyed building with a watercolor painting of a dilapidated house.", " a small building with windows and a roof."], "sample_ids": ["5a33f024faf145ac80cdadcdfef8a797", "0ef2cac27e364c0687afae7ab5040cc3"], "properties": ["image, building, painting", "roof, windows, building"], "captions_pred_pc": ["above a black and white drawing of a building", "a black and white square made up of small dots on a white background"], "captions_pred_image": ["a black and white photograph of a damaged house", "a 3d model of an apartment building royalty free 3d model preview no 3"], "question": "which building has a roof?", "label": 1}, {"captions": ["white 3d tank model", "a featuring a building, a plane, a printing machine, a large machine, a room with a computer, and a large gray box."], "sample_ids": ["31b2f632bf5e4128a5f59a7c9ddad62f", "7e2b63ba4ce24cecacea67dd052016c1"], "properties": ["color is white, model is 3d, name is tank", "building, plane, room"], "captions_pred_pc": ["a close-up view of the back of the phone case", "a black and white image of a rectangular frame with dots on a white background a black and white image of a rectangular frame with dots on a white background royalty free illustration"], "captions_pred_image": ["a white 3d model of a tank on a gray background", "a 3d model of a box with a lot of items inside"], "question": "which entity has a room?", "label": 1}, {"captions": [" of a gray stereo system with a blue drawer, resembling a printer and computer with a blue screen, featuring a cd player.", "a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier."], "sample_ids": ["3d4d965e67744415b69ff6aaeb11f420", "ef8288c9fdfc4e0f9c1fe25d570a104e"], "properties": ["color, screen, drawer", "color is white, yellow, plastic"], "captions_pred_pc": ["above a black and white image of a brush", "a black and white image of a metal bowl with dots"], "captions_pred_image": ["a 3d model of a printer on a white background royalty free 3d model no.2", "a white plastic container with a label on it"], "question": "which entity is made of plastic", "label": 1}, {"captions": [" a pink room featuring a bed, desk, window, and lamp.", " of a small wooden house with a roof."], "sample_ids": ["395af20de6fe49dbbbb030f0e452cbe1", "f5904a9d87ff4fa688146c18c1f27fec"], "properties": ["bed, desk, window", "roof, house, wooden"], "captions_pred_pc": ["of a black and white drawing of a curved line", "a black and white drawing of a house with dots"], "captions_pred_image": ["a 3d model of a bedroom royalty free 3d model preview no.2", "a 3d model of a small house"], "question": "which entity has a roof", "label": 1}, {"captions": [" a large, ancient stone building, resembling a roman structure and a medieval castle.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["f96abfafd34040a4bb09f5e2973403e9", "97e000ff41094665afd94ea565da8b13"], "properties": ["building, material, stone", "roof, material, wood"], "captions_pred_pc": ["a black and white drawing of the letter 'l' on a white background royalty free illustration", "a black and white drawing of a floor plan"], "captions_pred_image": ["3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosse", "a 3d model of the roof of a building"], "question": "which building is made of wood", "label": 1}, {"captions": ["a low poly of a pink donut with eyes, mouth, and yellow frosting.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["1271987b3fed464daad412ebce14d33e", "a17477b445b3443189dad22f768b888b"], "properties": ["color, mouth, eyes", "roof, pillar, stairs"], "captions_pred_pc": ["of a silver bowl with black dots on the surface", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d model of a white donut with eyes", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": [" of a green tent with a green cover and white awning.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["5168d23199604fa6b8fd982c2e2cf9e9", "b896a0898efe4059a776193c02132129"], "properties": ["color, awning, cover", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white illustration of a pyramid made of dots", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a skateboard ramp royalty free 3d model preview no.2", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": [" a celtic stone cross.", " a rusted metal barrel with a yellow and red warning sign and stripe on it."], "sample_ids": ["95c9f6a3061c4850be22339959ac70d6", "5a49ad82ef7a4d33badea2261720f518"], "properties": ["- material is stone- shape is cross- color is black", "rusty, warning, metal"], "captions_pred_pc": ["for a black and white image of a watch", "a black and white drawing of dots on a white background a black and white drawing of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a celtic cross gravestone royalty free 3d model preview no.3", "a black and white photograph of a barrel"], "question": "which object is made of metal", "label": 1}, {"captions": ["a featuring a teddy bear on a rainbow, accompanied by a baby, a cat, a dog, and a drink, with a pink and green sign displaying \"close the sky over dreams.\"", " a tan, cylindrical hat with a hole in it."], "sample_ids": ["80dfbe37b3d74f11b712ca1ad6570f70", "00c350e9f5f24fcd8bcc378da2963d4c"], "properties": ["image, color, pink", "hat, color, tan"], "captions_pred_pc": ["above a black and white photograph of a dog in a bowl", "a black and white drawing of a mushroom on a white background royalty free illustration"], "captions_pred_image": ["a 3d sculpture of an animal on a piece of paper", "a 3d model of an object with a hole in it"], "question": "which entity is not a hat?", "label": 1}, {"captions": [" a small white barn with a metal roof.", " a house with wooden framing and trusses."], "sample_ids": ["4ca3342a96824684845f7d0e062ab176", "4501794e257c4a8ba60a94757d8e93a9"], "properties": ["roof, metal, white", "frame, trusses, wood"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a house made of dots", "a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a barn", "a 3d model of a house under construction"], "question": "which building has a wooden frame?", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", "white of a rhino head with horns."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "8481aade84de47cab1a9accf8067e678"], "properties": ["roof truss, insulation, suspended ceiling", "image, rhino, head"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "of a penguin skull in black and white"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "rhino head 3d model royalty free 3d model preview no 3"], "question": "which image shows a rhino head?", "label": 1}, {"captions": [" a crab with long legs, wires, and tentacles, resembling a hybrid of an octopus, squid, spider, and robot.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["ec5914b53b6a4cde8de4820050bc46c5", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["resembles, octopus, squid, spider, robot", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white illustration of a jellyfish", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a robotic octopus on a white background royalty free 3d model preview no.2", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a small white table with stairs and a ladder, featuring a black and white kitchen hood and a black square ceiling light.", " a small house with a tree in front, situated on a hill."], "sample_ids": ["5565c16f297e405f9d5dbf0ebb623605", "3bde44b5f10946398f1bb9843dc14caa"], "properties": ["table, stairs, ladder", "house, tree, hill"], "captions_pred_pc": ["above a black and white photograph of a small square in the center of the image", "a black and white photo of a cell phone in a puddle of water"], "captions_pred_image": ["a 3d model of a table with a stool on top", "a 3d model of a house in the middle of a field"], "question": "which entity has a tree in front", "label": 1}, {"captions": ["a small blue plastic box with a lid and handle.", "white ceramic urn with a red lid."], "sample_ids": ["3e5cc957c888491ba1a5773299d3daa9", "ec15c810a38d4d45a36db910ecb2bcf8"], "properties": ["blue, lid, handle", "color, white, lid, red"], "captions_pred_pc": ["in 15 or fewer words a black and white illustration of a rectangular tray", "a black and white photograph of a vase"], "captions_pred_image": ["a 3d rendering of a gray plastic container", "a white ceramic vase sitting on top of a gray surface"], "question": "which object has a red lid", "label": 1}, {"captions": [" a dragon flying above a city, accompanied by a train, various objects, animals, and a person walking.", "a featuring a skeleton, torn paper, long stick, rock, and broken wood."], "sample_ids": ["f69264c33c324343b8a0a35d49ae0942", "46903bf029934b1989bc062dcb0a5531"], "properties": ["a city, train, dragon", "skeleton, torn, paper, long stick, rock, broken wood"], "captions_pred_pc": ["a black and white image of a toy car", "a close up of a black object on a white background"], "captions_pred_image": ["a line drawing of an airplane flying in the sky", "a 3d sculpture of a person's hand in the air royalty-free 3d model preview"], "question": "which entity is a still life?", "label": 1}, {"captions": [" a large house with a roof on a platform.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["cb3e09a301b746918a682a595037c7f7", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["roof, platform, house", "house, fence, playground"], "captions_pred_pc": ["a black and white image of a piece of paper", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a room with a lot of wires"], "question": "which house has a fence", "label": 1}, {"captions": ["a featuring a boat, table, chairs, umbrella, and solar panel.", "a featuring a building, a coin, a small black box, and a ball."], "sample_ids": ["0f0eb3a198d341d28f809b6d7634be8a", "949cf1a57aea45d18261e980b21b8c35"], "properties": ["boat, table, chairs, umbrella, solar panel", "a, building, coin, ball"], "captions_pred_pc": ["a black and white illustration of a boat with an umbrella", "a black and white illustration of a triangular shaped object"], "captions_pred_image": ["a 3d model of a boat, a table, chairs, and an umbrella", "a 3d model of a box with a coin next to it"], "question": "which entity has a coin?", "label": 1}, {"captions": [" a room featuring a wall with a painting, a hole, and a door.", " a room featuring a wall with a painting, a hole, and a door."], "sample_ids": ["1d1328346a464d2482463d6d5288e934", "1d1328346a464d2482463d6d5288e934"], "properties": ["painting, door, wall", "painting, door, wall"], "captions_pred_pc": ["in one hundred words or less an illustration of an igloo on a white background stock illustration", "in one hundred words or less an illustration of an igloo on a white background stock illustration"], "captions_pred_image": ["a black and white photograph of a torn piece of paper in the shape of a bird", "a black and white photograph of a torn piece of paper in the shape of a bird"], "question": "which entity has a painting", "label": 0}, {"captions": ["a featuring a small room with a bunk bed, desk, chair, table, and a blue house.", " a small house with a roof."], "sample_ids": ["dd3a9323ed514ccab330973ff9588015", "0d2246e433ce4066b76489f17ba8d694"], "properties": ["room, bed, desk", "roof, house, small"], "captions_pred_pc": ["a black and white drawing of a door", "a black and white square made up of small dots on a white background"], "captions_pred_image": ["a 3d model of a small room with a bunk bed", "a 3d model of a house with a triangular roof"], "question": "which house has a roof", "label": 1}, {"captions": [" a small house with a roof and ceiling-mounted air conditioner.", " a small building with a yellow roof, featuring a box, a chair, and a plane flying overhead."], "sample_ids": ["6965067ea9e34357a7af21a2f078fbac", "a2354f13774340d392fbf33564934aab"], "properties": ["roof, air conditioner, house", "building, roof, yellow"], "captions_pred_pc": ["a black and white illustration of a window", "a black and white image of a cell phone"], "captions_pred_image": ["a 3d rendering of a small house with a covered porch", "a 3d rendering of a machine with a conveyor belt"], "question": "which building has a roof", "label": 1}, {"captions": ["a 3d object featuring a rock, shell, piece of paper, and cat.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["53efab50e5a74e5ea165c763cea15be4", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["a, rock, paper", "island, mountain, grass"], "captions_pred_pc": ["for a flock of birds in the sky", "a black and white map of the island of malta"], "captions_pred_image": ["a black and white image of a piece of paper in the shape of an island", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more grass", "label": 1}, {"captions": [" various fruits and vegetables.", " a small house with a staircase, balcony, and wooden floor."], "sample_ids": ["885fa3a9c60e4279ba7eb0d29c64ae3d", "e67e211004cb450cbaf8139dd74ba39b"], "properties": ["color, shape, size", "floor, staircase, balcony"], "captions_pred_pc": ["a black and white image of a sponge-like object", "a black and white drawing of a wallet"], "captions_pred_image": ["a collection of various fruits and vegetables arranged on a table", "a 3d model of a bench on a wooden floor"], "question": "which entity has a floor", "label": 1}, {"captions": [" of a house featuring a wooden roof structure with trusses and beams.", " a building with a metal and wooden pole structure."], "sample_ids": ["8cd3f5ff0fc041eca9a480faa6739480", "e2e2ab4474b84f33809979da457eedd9"], "properties": ["roof, trusses, beams", "structure, material, pole"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a window", "a black and white illustration of a line of dots on a white background"], "captions_pred_image": ["a 3d model of a roof structure", "a 3d model of a structure with multiple tables and chairs"], "question": "which structure is made of metal and wooden poles", "label": 1}, {"captions": [" a white plastic container with a lid, a small box, a cup, a bottle, and a jar.", " a wooden staircase with a red and white railing and a red arrow."], "sample_ids": ["20a02705a66f460492e07345e84a62ed", "bb8bb4c9972d4b718b8bbd3ed5fdd14d"], "properties": ["a box, a cup, a bottle, a jar", "arrow, red, white"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "above a black and white image of a square with a white cross in the center"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic cap, and a plastic container", "a 3d model of a spiral staircase"], "question": "which object has a red arrow?", "label": 1}, {"captions": ["a featuring a red hat, floating cup, bowl filled with candy, and a strawberry.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["e27a9fd533dc41da9cf2eeb8fee2a5af", "c3a82df41875402285608ef13a55df57"], "properties": ["hat, candy, strawberry", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white illustration of two spheres", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a black and white image of a person wearing a hat", "a white plastic object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a red and black drone with wheels and propellers.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["1cd3d0db9ec744549124443e6dd0a9f8", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["color, wheel, propeller", "camera, speaker, ceiling fan"], "captions_pred_pc": ["above a black and white image of a toy car on a white background", "for a black and white image of an object on a white background"], "captions_pred_image": ["the image is displayed at 1000x1000 pixels", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a camera", "label": 1}, {"captions": [" a pig with an open mouth and a knife.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["d2d3b56c704341198aebd3601a6bb624", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["mouth, open, knife", "door, lock, handle"], "captions_pred_pc": ["a black and white photo of a hippopotamus", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a 3d model of a pig with a knife in its mouth royalty free 3d model preview no.2", "a black and white image of a door with a crack in it"], "question": "which entity has a lock", "label": 1}, {"captions": [" a bed featuring a blue mattress, blue and yellow pillows, and cushions.", " a house with a roof, wooden beams, and chimney."], "sample_ids": ["f7a59020e0764707b4aa354ed6e574cb", "be1376023c274bdda995d54f3694157f"], "properties": ["color, mattress, pillows", "roof, beams, chimney"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a television", "a black and white drawing of a bathroom with a shower"], "captions_pred_image": ["a 3d model of the bed royalty free 3d model no.2", "a 3d model of a house with a roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" of a house with multiple roofs and a suspended ceiling, featuring a white and purple tray.", " a house with a yard, trees, bushes, and surrounding buildings."], "sample_ids": ["7907916e8f524df5b16eb566497db83e", "7f8942ef51dd4246993a587a12df168c"], "properties": ["color, roof, tray", "house, yard, surrounding buildings"], "captions_pred_pc": ["a black and white image of a metal object", "a black and white image of a truck on a white background"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d model of a house in the middle of a field"], "question": "which house has a yard", "label": 1}, {"captions": ["a featuring a tree stump, mossy wood, leaves, and a rock with grass.", " of a yellow metal locker with legs, wheels, and metal brackets."], "sample_ids": ["2527cd763a1a43f9870eb65e44e79f7d", "e3fde8fe782c41f0b141c9f1b8e13aa5"], "properties": ["mossy, rock, grass", "metal, legs, wheels"], "captions_pred_pc": ["a black and white image of a person on a skateboard", "a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white"], "captions_pred_image": ["a 3d model of a piece of wood on a white background", "a 3d model of an old metal locker"], "question": "which object has legs", "label": 1}, {"captions": ["a small yellow and black helicopter flying in the air.", " a small black house with a green roof, resembling a shed or container."], "sample_ids": ["7qxP6dQ5nNuaG8d0vswXXKnd0vq", "bdb8e4c36ccb477890fd6ae569ae305c"], "properties": ["color, yellow, black", "black, roof, green"], "captions_pred_pc": ["a black and white illustration of an airplane", "a black and white drawing of a square with dots all over it"], "captions_pred_image": ["a black and white helicopter flying in the air", "a 3d model of a small black building"], "question": "which entity has a green roof", "label": 1}, {"captions": [" a flying bird, resembling a crow and a pigeon.", " of a wrecked plane, ship, and bird on a pile of rocks with grass."], "sample_ids": ["5ec78c8b6ab54f739adb0b46d216a454", "b0c703df20154bbf9fd8707c61137fc5"], "properties": ["bird, resembles, crow, pigeon", "plane, ship, bird"], "captions_pred_pc": ["above a black and white illustration of an airplane on a white background", "a black and white watercolor map of the state of ohio"], "captions_pred_image": ["a black and white image of a bird in flight", "a black and white photograph of a pile of debris on the ground"], "question": "which entity is a wrecked plane", "label": 1}, {"captions": [" featuring a pink and white dress, a purple and white vase, a box, and a coffee cup with a purple flower.", " a wooden staircase with a railing and table."], "sample_ids": ["ec2de6c604e44e6782ffab0c46daf33b", "956247bea850458199c651037d4b1d7f"], "properties": ["a, dress, flower", "railing, table, staircase"], "captions_pred_pc": ["a black and white drawing of a tea bag in the shape of a flower on a white background a black and white drawing of a tea bag in the shape of a flower on a white background royalty free illustration", "above a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a vase and a book next to each other", "a 3d model of a table with a staircase"], "question": "which entity has a table?", "label": 1}, {"captions": [" a metal building with a purple roof and cage structure.", " a house with a roof and beams."], "sample_ids": ["cbc10fb816034537b052e7c8fb75c4a6", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["roof, purple, structure", "roof, beams, house"], "captions_pred_pc": ["for a black and white image of a bench", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a building with a metal roof", "a 3d model of a building with a roof"], "question": "which entity has a roof with beams", "label": 1}, {"captions": [" a wall-mounted wooden panel with brown, black, gold, and white stripes.", " a white castle composed of small cubes."], "sample_ids": ["2218f0aa140f4f2f87abaaabe3de5516", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["color, material, pattern", "composed of, white, cubes"], "captions_pred_pc": ["a black and white illustration of an ice cream cone", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["a black and white image of a shelf with multiple shelves", "a 3d model of a castle made of white cubes"], "question": "which entity is composed of small cubes", "label": 1}, {"captions": ["a featuring a boat, table, chairs, umbrella, and solar panel.", " a clay pot with holes in it."], "sample_ids": ["0f0eb3a198d341d28f809b6d7634be8a", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["boat, table, chairs, umbrella, solar panel", "hole, material, clay"], "captions_pred_pc": ["a black and white illustration of a boat with an umbrella", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a boat, a table, chairs, and an umbrella", "a clay sculpture of a face with holes in it"], "question": "which object is made of clay", "label": 1}, {"captions": ["white space shuttle model.", "a black and white of a knife/sword with a handle."], "sample_ids": ["c5f8ada1dab549c3a1c77997dcea0ca2", "c7692fa635e049bda0a2039fa5a784a4"], "properties": ["color is white, model is space shuttle, size is small", "image, color, black and white"], "captions_pred_pc": ["a black and white illustration of a pair of sunglasses", "of a black and white knife on a white background"], "captions_pred_image": ["a white 3d model of a space shuttle on a gray background", "a black and white image of a knife"], "question": "which image is black and white", "label": 1}, {"captions": [" of a plague mask with a rusty, horned, wooden helmet and a crow's head design.", " a gray, metal pillar/cylinder."], "sample_ids": ["2b0896f810074399a5ae7d6dbab8c330", "11391e6bab574dc0be8f2440fbc3b724"], "properties": ["- material is wood, rusty, horned", "color is gray, material is metal, shape is cylinder"], "captions_pred_pc": ["in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration", "of a black candle on a white background"], "captions_pred_image": ["3d model of a plague doctor's mask", "a 3d model of a candlestick"], "question": "which object is made of metal", "label": 1}, {"captions": [" a house featuring a roof, floor plan, heating system, and ceiling light fixture.", " a potted christmas pine tree."], "sample_ids": ["269939560e08432f9e134309b9a1c587", "460c8f3034a844159826fac3b8aa35a5"], "properties": ["floor plan, heating system, ceiling light fixture", "a, color, green"], "captions_pred_pc": ["a black and white drawing of a house", "a black and white illustration of a snowflake on a white background"], "captions_pred_image": ["a 3d model of a building with a glass facade", "a 3d model of a christmas tree in a vase"], "question": "which object is green", "label": 1}, {"captions": [" a building with a roof structure, featuring a wooden truss and ceiling with a light.", " an african figurine with a bow and arrow, standing on a wooden base, wearing a hat."], "sample_ids": ["cb42ecb7a3fd4eba99f166150ecbc9a7", "d81d13362ae04371bb2cba46e4939665"], "properties": ["roof structure, truss, ceiling", "hat, bow, arrow"], "captions_pred_pc": ["a black and white image of a stainless steel sculpture", "above a black and white photo of an ice sculpture"], "captions_pred_image": ["a 3d model of a barn royalty free 3d model preview no 2", "a sculpture of an african man sitting on a pedestal"], "question": "which entity has a wooden base?", "label": 1}, {"captions": [" a pink room featuring a bed, desk, window, and lamp.", " a concrete wall with peeling paint and rusted metal features."], "sample_ids": ["395af20de6fe49dbbbb030f0e452cbe1", "4376831ff557462dbacc4cce88a8cc86"], "properties": ["bed, desk, window", "paint, rust, concrete"], "captions_pred_pc": ["of a black and white drawing of a curved line", "above a black and white image of a shelf on a white background"], "captions_pred_image": ["a 3d model of a bedroom royalty free 3d model preview no.2", "a 3d model of a concrete wall"], "question": "which entity is a wall?", "label": 1}, {"captions": [" an old castle in a grassy field.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["26ea562f32d54afe919b73486dbf7d53", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["field, grass, castle", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["above a black and white image of a castle in the middle of a field", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a black and white image of a broken column on a piece of paper", "a white 3d model of a city skyline"], "question": "which image shows a laptop?", "label": 1}, {"captions": [" a small building with windows and a roof.", " a spider-like creature with long arms and legs."], "sample_ids": ["0ef2cac27e364c0687afae7ab5040cc3", "199bcb789e0c439bb2eeb32f2425cc36"], "properties": ["roof, windows, building", "arachnid, leg, arm"], "captions_pred_pc": ["a black and white square made up of small dots on a white background", "a black and white illustration of a spider on a white background"], "captions_pred_image": ["a 3d model of an apartment building royalty free 3d model preview no 3", "a black and white image of an alien creature"], "question": "which entity has more legs", "label": 1}, {"captions": ["a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["ef8288c9fdfc4e0f9c1fe25d570a104e", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color is white, yellow, plastic", "a, material, clay"], "captions_pred_pc": ["a black and white image of a metal bowl with dots", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a white plastic container with a label on it", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": [" a small wooden building with a roof, resembling a birdhouse or cabinet.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["a60e4d5f34aa4a6280343a8f15bb1c13", "06a1c233fb444830b577aa06e2c01294"], "properties": ["house, roof, wooden", "house, tree, hill"], "captions_pred_pc": ["for a black and white image of an object on a white background", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a black and white photograph of a birdhouse", "a black and white image of a house in the middle of a field"], "question": "which house is on a hill?", "label": 1}, {"captions": ["a featuring a bench with flowers, a bridge, a white sculpture, a bicycle with a flower, a giraffe, and a palm tree.", " a suitcase on wheels, featuring a bird, a laptop, a robot, and a vacuum cleaner."], "sample_ids": ["12093c89a60941e7884b252bdc05104c", "e8100bef7b8a48d4ac79684bffb349ba"], "properties": ["giraffe, bench, flower", "Wheels, laptop, robot"], "captions_pred_pc": ["a black and white drawing of a gear on a white background", "a black and white drawing of a cell phone"], "captions_pred_image": ["a 3d model of a sculpture made of sticks", "a black and white image of a suitcase on wheels"], "question": "which entity has a laptop?", "label": 1}, {"captions": [" a multicolored rock with various green, brown, and other hues, featuring a hole.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["04be3554cef349f2bc631b7f30898228", "c3a82df41875402285608ef13a55df57"], "properties": ["color, shape, material", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a map of the country in black ink on a white background royalty free illustration", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a rock on a white background royalty free 3d model preview no 1", "a white plastic object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a blue and white radio in the shape of a small suitcase.", " a spiked baseball bat and a spiked hammer."], "sample_ids": ["314cb57bed324d268c1205f5c7bf80ab", "6ce3aa0ecf76431e9e50256a57c92330"], "properties": ["color, shape, material", "spiked, baseball bat, hammer"], "captions_pred_pc": ["of a black and white drawing of a purse", "for a black star on a white background"], "captions_pred_image": ["a 3d model of an old-fashioned radio on a white background royalty-free 3d model preview no.2", "a 3d model of a baseball bat"], "question": "which object is made of metal", "label": 1}, {"captions": ["a white of a spaceship and building.", " a small, modern house with a green roof, located on a hill, surrounded by trees, grass, and a pond."], "sample_ids": ["bf7d4277c9184d35abdec85bd5e25956", "a452d5381dad4dc09f5ebe10635ae5fe"], "properties": ["image, building, spaceship", "house, roof, green"], "captions_pred_pc": ["a black and white drawing of a tree", "above an illustration of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white"], "captions_pred_image": ["a 3d model of a white object on a gray background", "a 3d model of a building with a black roof"], "question": "which building is greener", "label": 1}, {"captions": [" a black metal shelf with four holes and a laptop on it.", "a featuring a white box with a hole, a paper clip, and a lock."], "sample_ids": ["b3b6f91d939d4193a0090eaabd39eb47", "839e143bb1684aaa955f2c3e0cf4eef2"], "properties": ["black, laptop, shelf", "box, paper clip, lock"], "captions_pred_pc": ["a close up of a black and white tile on a white background", "above a black and white image of a clock in the shape of a spiral"], "captions_pred_image": ["a 3d rendering of a black metal shelf", "a 3d model of a stapler with a staple in it royalty free 3d model no."], "question": "which object has a lock?", "label": 1}, {"captions": ["a wooden-handled axe .", " a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole."], "sample_ids": ["439ad3cdddbc4211b8a3c98448e900cc", "b16fb21cda9a4a21a024df749c2304f4"], "properties": ["handle material is wood, head material is metal, overall length is long", "roof, ceiling, hole"], "captions_pred_pc": ["a black and white image of a hammer on a white background", "a black and white image of a square with dots on it"], "captions_pred_image": ["3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe", "a 3d model of a small house and a tree in the foreground"], "question": "which object has a hole in the ceiling", "label": 1}, {"captions": [" a snow-covered mountain with blue and white stripes.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["a95e4948175142f39e7d157f801c60c3", "97e000ff41094665afd94ea565da8b13"], "properties": ["color, shape, texture", "roof, material, wood"], "captions_pred_pc": ["above a black and white image of a spiral in the sky", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d model of a mountain range in the desert royalty-free 3d model preview no. 1", "a 3d model of the roof of a building"], "question": "which entity is made of wood", "label": 1}, {"captions": ["a 3d white object featuring stacked racks, toy train, blocks, plastic pipes, lego pieces, and clothes hangers.", " a wooden shed with a gray roof."], "sample_ids": ["c9c786f133f54d5e8d99bfc1a588df41", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["stacked, racks, toy train", "roof, color, gray"], "captions_pred_pc": ["a black and white photo of a person standing in front of a white wall", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a ship's propeller on a white background", "a 3d model of a shed with a gray roof"], "question": "which object has a gray roof", "label": 1}, {"captions": ["s of a cat, toilet, white bird with black eyes, stuffed animal, man in a white shirt, paper airplane, and cow.", " a wooden roof structure with a pink roof."], "sample_ids": ["7adf9de5fb734455a3a3a7f084e3d628", "b70565bda31d42958d3597bf6067ddd2"], "properties": ["cat, toilet, white bird with black eyes, stuffed animal, man in a white shirt, paper airplane, cow", "roof, color, pink"], "captions_pred_pc": ["a black and white image of a flying saucer", "above a black and white image of a metal grate"], "captions_pred_image": ["a black and white image of a fighter plane flying upside down", "a 3d model of the roof of a building"], "question": "which entity has a roof that is pink?", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["color, white, black, white", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d rendering of a plastic box with several compartments"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a multicolored cube representing a protein, featuring pink, yellow, red, and green hues.", "a low-poly of a white, shattered sphere."], "sample_ids": ["ee7c3113f2754f9cbe8980b1b7cc4eff", "94119660e7054fc5b7baa68a4e39968c"], "properties": ["color, shape, color", "sphere, color, texture"], "captions_pred_pc": ["a black and white pattern of small dots on a white background a black and white pattern of small dots on a white background illustration", "a black and white illustration of a dandelion on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white"], "captions_pred_image": ["a 3d model of a piece of fabric", "a 3d model of a cracked egg"], "question": "which entity is a sphere?", "label": 1}, {"captions": [" a castle on a hill in the middle of a field.", " a toy motorcycle, car, and robot on an orange platform."], "sample_ids": ["5c6965cc9640450d91ba7d788d4e01fe", "7407a108e0354925b83b750339bc03df"], "properties": ["hill, field, castle", "platform, color, orange"], "captions_pred_pc": ["a black and white photograph of a snowflake on a white background", "a black and white illustration of a bicycle"], "captions_pred_image": ["a 3d model of a castle on a hill", "a 3d model of a motorcycle on a pedestal"], "question": "which object is on a platform?", "label": 1}, {"captions": [" a small gnome chandelier with three candles, featuring red and blue lights and adorned with pink and blue ribbons.", " a small table with a staircase and a square ceiling light."], "sample_ids": ["6398f4e75a2d480da58396827ac64249", "ef0cbf4628b74253b885480deefe6fbd"], "properties": ["- color is red, blue, pink", "table, staircase, light"], "captions_pred_pc": ["for a black and white image of an object on a white background", "for a black and white image of a small square on a white background"], "captions_pred_image": ["a white chandelier with three light bulbs hanging from the ceiling", "a 3d model of a table with a staircase"], "question": "which entity has a staircase?", "label": 1}, {"captions": ["a featuring a sailboat with a hook, a bird suspended in the air, and a pair of scissors.", "three white plastic containers with lids, including a box, a cylinder, and a bottle."], "sample_ids": ["f57ae66555d34349aeadc38b33f8f267", "67e8933750254cd8afddbf4865ae9e39"], "properties": ["a, bird, hook", "box, cylinder, bottle"], "captions_pred_pc": ["of a 3d scan of a person's torso and limbs", "a black and white dots pattern on a white background"], "captions_pred_image": ["a black and white photo of a kite flying in the sky", "a 3d model of a plastic bottle, a plastic container, and a plastic lid"], "question": "which entity has a box", "label": 1}, {"captions": [" a stone arrowhead with blue crystals and ice-like features.", " of a white rock-like object, possibly a shell or ice."], "sample_ids": ["5f8c7eda0f464019a4acea243114555d", "096e42b466ec438d95c5d89a85191534"], "properties": ["- material is stone - color is blue - shape is arrowhead", "white, rock, shell"], "captions_pred_pc": ["above a black and white drawing of an arrow", "in one hundred words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words"], "captions_pred_image": ["a 3d model of a large piece of ice on a white background royalty free 3d model preview no 3", "a 3d model of a white rock on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a white building with a square ceiling panel and a white 3d printed plane on top.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["eb3ea0e6963f4efda2a8cf0732befd56", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["- material is 3d printed, ceiling panel is square, plane is white", "roof, color, yellow"], "captions_pred_pc": ["above a black and white drawing of a cross", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a building on a white surface", "a 3d model of a house with a roof"], "question": "which building has a roof that is yellow?", "label": 1}, {"captions": ["s of a skateboard, snowboard, door, and three pairs of shoes, along with a paper mask and a paper with a hole.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["a267d906e4cf4d36bfe841c6cc9e698b", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["s of, snowboard, shoes, mask", "roof, color, yellow"], "captions_pred_pc": ["above a black and white image of a person standing on a piece of paper", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of a surfboard flying through the air", "a 3d model of a house with a roof"], "question": "which entity has a roof that is the color of yellow?", "label": 1}, {"captions": [" a pair of fur-trimmed boxing gloves and a human heart, with a man's hand wearing a hat.", " a house featuring a roof with wooden trusses and a ladder."], "sample_ids": ["661c85f7cea14b7c81fb30b31d603cab", "3cd410c4359a4cef98702963a2b9802b"], "properties": ["boxing gloves, heart, hand", "roof, trusses, ladder"], "captions_pred_pc": ["a black and white image of a sponge in the shape of the letter 'v'", "a black and white drawing of a tv screen on a white background"], "captions_pred_image": ["a 3d model of the human heart", "a 3d model of the roof of a building"], "question": "which entity has a ladder?", "label": 1}, {"captions": [" a small house with stairs and a balcony.", " a small house with a roof."], "sample_ids": ["0fbc5f16d301450c820b1f2158fd4f69", "0d2246e433ce4066b76489f17ba8d694"], "properties": ["balcony, stairs, house", "roof, house, small"], "captions_pred_pc": ["a black and white image of a square with dots on it", "a black and white square made up of small dots on a white background"], "captions_pred_image": ["a 3d model of a building with two floors and a balcony", "a 3d model of a house with a triangular roof"], "question": "which house has a roof", "label": 1}, {"captions": ["a featuring a building, a plane, a printing machine, a large machine, a room with a computer, and a large gray box.", "a featuring a flying plane, a destroyed plane, a large airborne ship, a building with a broken roof, and a broken piece of metal."], "sample_ids": ["7e2b63ba4ce24cecacea67dd052016c1", "4839e3b998ff4f6a84de50488ffae3ba"], "properties": ["building, plane, room", "a, building, roof"], "captions_pred_pc": ["a black and white image of a rectangular frame with dots on a white background a black and white image of a rectangular frame with dots on a white background royalty free illustration", "for a black and white drawing of a person holding a pencil"], "captions_pred_image": ["a 3d model of a box with a lot of items inside", "a 3d model of the space shuttle"], "question": "which building has a roof", "label": 1}, {"captions": [" a small house with a blue roof, a door, and a pool.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["40c52c2d278345c5b4e8d00a991271dc", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["door, roof, pool", "island, mountain, grass"], "captions_pred_pc": ["of a black and white photo of a window with fringes", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d model of a small house", "a 3d image of a small island in the middle of a lake"], "question": "which entity has a mountain?", "label": 1}, {"captions": [" a small bedroom with wooden floors, walls, roof, and shelf.", " an african figurine with a bow and arrow, standing on a wooden base, wearing a hat."], "sample_ids": ["e602ac60041f4b4f84c044161e478781", "d81d13362ae04371bb2cba46e4939665"], "properties": ["floor, wall, roof", "hat, bow, arrow"], "captions_pred_pc": ["above a black and white image of a decorative metal bar", "above a black and white photo of an ice sculpture"], "captions_pred_image": ["a 3d model of a room with wooden walls and a rug on the floor", "a sculpture of an african man sitting on a pedestal"], "question": "which entity has a wooden base?", "label": 1}, {"captions": ["a featuring a fireplace, wooden bench, sled, log, castle, and cat.", " a concrete wall with peeling paint and rusted metal features."], "sample_ids": ["fe4cd6c3972940a5b7854ca1ebc8a5a3", "4376831ff557462dbacc4cce88a8cc86"], "properties": ["fireplace, bench, log", "paint, rust, concrete"], "captions_pred_pc": ["a black and white illustration of a snowflake", "above a black and white image of a shelf on a white background"], "captions_pred_image": ["a 3d model of a fireplace with snow on the ground", "a 3d model of a concrete wall"], "question": "which entity is made of concrete", "label": 1}, {"captions": ["a small white bowl with a light blue glaze and blue rim.", "a black and gold cylindrical object with a gold handle, resembling a cigarette lighter, lamp, and pen."], "sample_ids": ["6f7201fbb58649379398a8d1d5c0cc7a", "c9b1c89380e947f58aa06eb56c93c6d8"], "properties": ["color, blue, rim, blue", "- color is black and gold- shape is cylindrical- material is metal"], "captions_pred_pc": ["a black and white drawing of a dotted circle on a white background", "a black and white image of a circular object on a white background"], "captions_pred_image": ["a white bowl on a gray background", "a 3d model of a black and white object on a gray background"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a house with a roof and a room featuring a pink ceiling.", "a white of a small archway gate."], "sample_ids": ["9c9f4e7f7c9442df99a9dc41870083c5", "a48aae47988c4daa9531e33b1e3085f8"], "properties": ["roof, room, ceiling", "image, color, white"], "captions_pred_pc": ["a black and white drawing of the letter 'l' on a white background", "above a black and white illustration of an arch"], "captions_pred_image": ["a 3d model of a building with a roof and walls", "a 3d model of a white arch"], "question": "which image is white", "label": 1}, {"captions": ["a 3d cartoon cat model wearing a blue outfit.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["0d8373961c804794a74971b946ebad8d", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, outfit, cat", "a, material, clay"], "captions_pred_pc": ["a 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a shell on a white background 3d model of a", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a cat wearing a jacket", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": [" of a japanese-style pagoda house in a pixelated village.", " of a black billboard with a blank screen on a metal stand."], "sample_ids": ["42c4e6ca4a0c4b7b9a97d543b2442222", "0c809d7bc6ff40b39d1d81bbb5f1b25e"], "properties": ["image is a japanese-style pagoda house in a pixelated village", "black, screen, blank"], "captions_pred_pc": ["a black and white drawing of a square on a white background stock illustration \u00a9 2019 iStock", "of a 3d illustration of a smartphone with a charging cable"], "captions_pred_image": ["a 3d model of a japanese temple and pagoda", "3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard 3d model of a billboard"], "question": "which entity is a billboard?", "label": 1}, {"captions": [" of a pile of metal, torn and shredded paper, rocks, and a decayed animal.", " a small white house with a roof."], "sample_ids": ["c117f1923cad4ecf9df61b6e3d633374", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["a pile of metal, torn and shredded paper, rocks, and a decayed animal", "roof, color, white"], "captions_pred_pc": ["a black and white map of germany on a white background", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d model of a crumpled piece of paper on a white background", "a 3d model of a building with a white roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" a building with a purple roof, featuring a ceiling with wooden beams.", " a metal building with a purple roof and cage structure."], "sample_ids": ["b0cb5cd2fdca4bd5bdf96dd5c0cc13b5", "cbc10fb816034537b052e7c8fb75c4a6"], "properties": ["roof, purple, beams", "roof, purple, structure"], "captions_pred_pc": ["the letter l made of snowflakes on a white background royalty free illustration", "for a black and white image of a bench"], "captions_pred_image": ["a 3d model of the roof structure royalty free 3d model no.", "a 3d model of a building with a metal roof"], "question": "which building has a purple roof", "label": 1}, {"captions": [" a house with a pink roof, brick walls, and insulated ceiling.", " of a house with a roof structure, including a greenhouse."], "sample_ids": ["c8936ace72954650b4e2d84246964849", "d7483292784b4e2b81df1c50f2a8664a"], "properties": ["roof, color, pink", "roof, structure, greenhouse"], "captions_pred_pc": ["a black and white drawing of a toilet", "a 3d illustration of a window with dots on a white background 3d illustration of a window with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a house with a roof", "a 3d model of a building with a roof"], "question": "which roof is made of a greenhouse", "label": 1}, {"captions": [" a black building.", " a large steel building with a pool."], "sample_ids": ["88702656e9684e1ea1a01dc7075c00e0", "72eed67d8d884c819b28e2e95eb48f06"], "properties": ["color, black, building", "building material, pool, steel"], "captions_pred_pc": ["a close up of a black and white rug on a white background", "a black and white illustration of a building with a tree growing out of it"], "captions_pred_image": ["a black 3d model of a house on top of a blueprint", "a 3d model of a concrete structure"], "question": "which building is made of steel", "label": 1}, {"captions": [" of a wooden tv stand with two drawers and handles.", " a large orange keg with a white lid."], "sample_ids": ["6409380e790442e6a5733eb447d4c510", "cf24eea70b4f4067b36583924a82cc35"], "properties": ["Drawer, Handle, Wood", "color, lid, orange"], "captions_pred_pc": ["a black and white drawing of a line of dots on a white background", "a black and white circular pattern of dots on a white background a black and white circular pattern of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a wooden entertainment center", "a gray keg with a white lid sits on top of a gray surface"], "question": "which object is made of wood", "label": 0}, {"captions": ["a small white wooden castle with flags, turrets, and two towers.", " a concrete wall with peeling paint and rusted metal features."], "sample_ids": ["345b1c7d2d9b4524aa0dcdc3cd27e4da", "4376831ff557462dbacc4cce88a8cc86"], "properties": ["turrets, flags, towers", "paint, rust, concrete"], "captions_pred_pc": ["a black and white illustration of a square made up of many small dots on a white background", "above a black and white image of a shelf on a white background"], "captions_pred_image": ["a 3d model of a castle with towers and a drawbridge royalty-free 3d model preview no.1", "a 3d model of a concrete wall"], "question": "which entity is made of concrete", "label": 1}, {"captions": [" a small house on an island with trees, shrubs, a pool, and a lake.", " a wooden roof structure with a pink roof."], "sample_ids": ["c8331489fca44685bedfa1bdadf6ccb3", "b70565bda31d42958d3597bf6067ddd2"], "properties": ["house, lake, pool", "roof, color, pink"], "captions_pred_pc": ["a black and white image of a pattern on a piece of paper", "above a black and white image of a metal grate"], "captions_pred_image": ["a 3d model of a large building", "a 3d model of the roof of a building"], "question": "which structure has a pink roof", "label": 1}, {"captions": [" a small white box with a green light.", " of a white spiral, earbuds, ceiling light, and silver ring with black and white scissors and design."], "sample_ids": ["a5e37b9c782c4340b4dea45fbe1c701a", "c69f60b389124ad9b4f81c64ec332054"], "properties": ["color, white, light", "earbuds, light, ring"], "captions_pred_pc": ["a 3d sculpture of a vase made of small black dots on a white background 3d sculpture of a vase made of small black dots on a white background royalty free illustration", "a black and white drawing of a needle and thread"], "captions_pred_image": ["a 3d rendering of a small white box", "a black and white illustration of a pair of sunglasses and a pair of scissors next to each other on a white background"], "question": "which entity has a light", "label": 1}, {"captions": ["a featuring a phone booth, desk, chair, lamp, ladder, and two additional chairs.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["7da804ad2b554c9a9915d775afb015d3", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["desk, chair, lamp", "island, terrain, water"], "captions_pred_pc": ["a black and white illustration of a city skyline", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d rendering of a desk and chair in a room", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" a small house with a pond and situated on a rock.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["92859eb82a344134806b37cc209927c6", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["house, rock, pond", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a toaster", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a house in the middle of a field", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a small house with stairs and a roof.", " a small black house with a green roof, resembling a shed or container."], "sample_ids": ["e9305c80010f4e3b9de9789f01a9bee5", "bdb8e4c36ccb477890fd6ae569ae305c"], "properties": ["roof, stairs, house", "black, roof, green"], "captions_pred_pc": ["above a black and white image of a square with dots all over it", "a black and white drawing of a square with dots all over it"], "captions_pred_image": ["a 3d rendering of a podium on a wooden floor", "a 3d model of a small black building"], "question": "which house has a green roof", "label": 1}, {"captions": ["a white of a building with columns, stairs, and railings.", "a featuring a bench with flowers, a bridge, a white sculpture, a bicycle with a flower, a giraffe, and a palm tree."], "sample_ids": ["c9ad30f336844b629cb237fa5b0d94f2", "12093c89a60941e7884b252bdc05104c"], "properties": ["image, building, stairs", "giraffe, bench, flower"], "captions_pred_pc": ["a black and white image of dots on a white background a black and white image of dots on a white background royalty free illustration", "a black and white drawing of a gear on a white background"], "captions_pred_image": ["a 3d model of a multi-level building with stairs and balconies royalty-free 3d model no.", "a 3d model of a sculpture made of sticks"], "question": "which image has a bench with flowers?", "label": 1}, {"captions": [" of a cup, bottle, and two metal buckets on a chessboard with a square ceiling light fixture.", "s of a cat, toilet, white bird with black eyes, stuffed animal, man in a white shirt, paper airplane, and cow."], "sample_ids": ["05b5a5da1a0a4c1fa60a9e5edd5c3424", "7adf9de5fb734455a3a3a7f084e3d628"], "properties": ["cup, bottle, chessboard", "cat, toilet, white bird with black eyes, stuffed animal, man in a white shirt, paper airplane, cow"], "captions_pred_pc": ["a black and white 3d shape made up of small dots on a white background", "a black and white image of a flying saucer"], "captions_pred_image": ["a 3d model of a chess set on a checkered board royalty-free 3d model", "a black and white image of a fighter plane flying upside down"], "question": "which entity has a stuffed animal?", "label": 1}, {"captions": ["a small green and white 3d teapot in the shape of a turtle.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["8df8b28138e040a89303e91518b09d59", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["shape is turtle, color is green, white", "a, material, clay"], "captions_pred_pc": ["above a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a white ceramic teapot on a gray background", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": [" a house with a green roof and lawn.", " a suitcase on wheels, featuring a bird, a laptop, a robot, and a vacuum cleaner."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "e8100bef7b8a48d4ac79684bffb349ba"], "properties": ["roof, green, lawn", "Wheels, laptop, robot"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "a black and white drawing of a cell phone"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a black and white image of a suitcase on wheels"], "question": "which entity has a laptop?", "label": 1}, {"captions": [" a red \"welcome to northwich\" billboard on a wooden base.", " a stone throne with stairs and a tree, accompanied by a concrete wall featuring a statue and a fireplace with a shelf above it."], "sample_ids": ["225e4094141d416faba7c5598dc55ff8", "93fb4197f0014f7582029af24c7ed9de"], "properties": ["base material is wood, color is red, message is welcome to northwich", "throne, stairs, tree"], "captions_pred_pc": ["a black and white illustration of a circular object with many small dots on it", "in 15 words or less a black and white image of a toilet paper roll on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a chalkboard on a pedestal royalty free 3d model preview no.2", "a 3d model of a throne with a tree on it"], "question": "which entity has a fireplace?", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", " of a tree stump and rock with flowers on them."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "3f74af45aeeb43ee95e2c8a5e3afeae6"], "properties": ["yellow, table, roof", "flower, rock, tree stump"], "captions_pred_pc": ["a black and white drawing of a floor plan", "above a black and white drawing of a flower on a white background"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree st"], "question": "which entity has a rock with flowers on it?", "label": 1}, {"captions": [" a house featuring a roof with truss system, framing, insulation, and a ceiling light.", " a small house on a hill in a field."], "sample_ids": ["39876e69e3914d99a07e0dc59611c5c0", "bd873071252047d38160c4a5fdd2c1b7"], "properties": ["roof, truss system, framing", "house, hill, field"], "captions_pred_pc": ["a black and white drawing of a window with dots all over it", "a black and white photograph of a piece of paper"], "captions_pred_image": ["a 3d model of the roof of a house", "a black and white image of a small house"], "question": "which house is on a hill?", "label": 1}, {"captions": ["a light pink leather handbag with a zipper on the side and a leather handle.", " a house with a blue roof, chimney, and wooden-beamed ceiling."], "sample_ids": ["3513ae49853d48609fdf8b26020d3c4f", "b380dd4800124a8d96424a504eb0ec6a"], "properties": ["color, handle, zipper", "roof, color, blue"], "captions_pred_pc": ["this image may contain bag accessory purse handbag clutch", "of a white lace clutch purse on a white background"], "captions_pred_image": ["a white handbag with a zippered compartment on the front", "a 3d model of a building with many windows"], "question": "which entity has a blue roof", "label": 1}, {"captions": [" a black and white box-like object with various interpretations, such as a coffee table, building, book, and ceiling fixture.", " of two rocks with ice elements."], "sample_ids": ["404d7e2cd8894c31bdda02d2b3196464", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["black, white, coffee table", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white drawing of a square with dots on it", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a black and white 3d model of a building", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light.", " a long row of steel shelves in a warehouse, featuring a suspended scaffolding system."], "sample_ids": ["5ea0962b100b4fccb761ed84afe027b5", "578fe7a7bd754b889be33aea99cf5050"], "properties": ["house, table, chair", "a, material, steel"], "captions_pred_pc": ["above a black and white photograph of an open door", "above a black and white image of a rack with multiple shelves"], "captions_pred_image": ["a 3d rendering of a small white table with a chair", "a 3d model of a large metal structure"], "question": "which object is made of steel", "label": 1}, {"captions": [" a spiral staircase with a railing in a small building.", " of two rocks with ice elements."], "sample_ids": ["28cae056856c4a8ba9d1a6af5355f831", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["staircase, railing, building", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white photograph of a light switch", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a staircase in a white room", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a house featuring a pink roof with purple tiles, wooden beam ceilings, and interior elements including a bed with a purple frame and a table with purple slats.", " of a house with a roof structure, including a greenhouse."], "sample_ids": ["09561cc68a84496bb14b75c0f516f089", "d7483292784b4e2b81df1c50f2a8664a"], "properties": ["roof, color, purple, beams, wooden, bed, frame, table, slats, purple", "roof, structure, greenhouse"], "captions_pred_pc": ["a black and white image of a square with a pattern of dots", "a 3d illustration of a window with dots on a white background 3d illustration of a window with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a table with a grid pattern", "a 3d model of a building with a roof"], "question": "which roof structure has a greenhouse", "label": 1}, {"captions": ["a 3d cartoon character wearing a hat and holding various items, such as a sword, gun, broomstick, and stick.", " a house with a roof structure and toothbrushes."], "sample_ids": ["9863dee114264b89a95dca4c78d08424", "7632d1ba4e8144c19484c263b6074d0c"], "properties": ["hat, sword, gun", "house, roof, toothbrushes"], "captions_pred_pc": ["for a black and white illustration of a small monster holding a spoon in one hand and a cup of coffee in the other", "a black and white illustration of the letter 'b' isolated on a white background illustration"], "captions_pred_image": ["a 3d model of a police officer holding a baton and wearing a hat and helmet", "a 3d rendering of a white box with a lot of blades"], "question": "which entity has a roof structure", "label": 1}, {"captions": ["a 3d-printed green plastic cylinder with a hole in the middle.", "white plastic cone-shaped hat, 3d printed."], "sample_ids": ["9faa0c251d394f368f4f537ea21f977f", "5cfd092fb5c44c9f8ae62c6e4c62903d"], "properties": ["color, material, shape", "shape is cone, material is plastic, color is white"], "captions_pred_pc": ["a black and white image of a glittering object", "a black and white illustration of a dome shaped object made of dots"], "captions_pred_image": ["a 3d model of a white object on a gray background", "a 3d model of a toilet paper holder"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a house with a pink roof, brick walls, and insulated ceiling.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["c8936ace72954650b4e2d84246964849", "b896a0898efe4059a776193c02132129"], "properties": ["roof, color, pink", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white drawing of a toilet", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a house with a roof", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": [" a large building with a roof and windows.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["32d1fbd3ee91426882290305f70021e6", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["roof, windows, building", "island, terrain, water"], "captions_pred_pc": ["of a black and white photo of a diamond buckle", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of an apartment building royalty free 3d model preview no.2", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" a small island featuring a large building, trees, and a house on a hill, surrounded by a forest.", " a suitcase on wheels, featuring a bird, a laptop, a robot, and a vacuum cleaner."], "sample_ids": ["d557c62e9be741a6b0f6b204d11a9c6f", "e8100bef7b8a48d4ac79684bffb349ba"], "properties": ["house, hill, forest", "Wheels, laptop, robot"], "captions_pred_pc": ["above a black and white illustration of a small island in the middle of a body of water", "a black and white drawing of a cell phone"], "captions_pred_image": ["a black and white image of a small island in the middle of a body of water", "a black and white image of a suitcase on wheels"], "question": "which entity has a laptop?", "label": 1}, {"captions": [" a house with a roof structure and toothbrushes.", " of a house with a pink roof."], "sample_ids": ["7632d1ba4e8144c19484c263b6074d0c", "6162909df6294848a8eea83c3aa9585b"], "properties": ["house, roof, toothbrushes", "color, roof, pink"], "captions_pred_pc": ["a black and white illustration of the letter 'b' isolated on a white background illustration", "a black and white drawing of the letter 'p' on a white background illustration"], "captions_pred_image": ["a 3d rendering of a white box with a lot of blades", "a 3d model of a house in the style of the 1920s and 1930s"], "question": "which house has a roof that is a different color than the house?", "label": 1}, {"captions": [" of a green alien creature with long legs and a long tail.", "a 3d white cube with windows resembling a building."], "sample_ids": ["ad7c9475a4e24462bf6b5c24bcde317a", "4a07a5293f024bb0a353954a056ef626"], "properties": ["color, leg, tail", "- material is white- color is white- texture is textured"], "captions_pred_pc": ["a black and white image of a sword on a white background", "a black and white image of a square made up of small dots"], "captions_pred_image": ["a 3d model of an alien creature with a long tail", "a 3d model of a cube"], "question": "which entity is white", "label": 1}, {"captions": ["a 3d object featuring a rock, shell, piece of paper, and cat.", "a featuring a small boat, a rock with a hole, and blue water."], "sample_ids": ["53efab50e5a74e5ea165c763cea15be4", "7ccdffc0d6404e8d9144260255ea0c5c"], "properties": ["a, rock, paper", "water, boat, rock"], "captions_pred_pc": ["for a flock of birds in the sky", "a black and white illustration of a surfboard"], "captions_pred_image": ["a black and white image of a piece of paper in the shape of an island", "a 3d image of an animal laying on the ground"], "question": "which object is in the water?", "label": 1}, {"captions": ["three white paper windmills and a city model with a nativity scene silhouette.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["fa5ee6165f31465d9d75d046818f4006", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["windmills, silhouette, city model", "door, lock, handle"], "captions_pred_pc": ["a black and white photo of a pair of sunglasses", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a 3d model of a cityscape on a white background", "a black and white image of a door with a crack in it"], "question": "which door has a lock", "label": 1}, {"captions": [" a green and black robot with cube-like features.", "a white of a man with arms outstretched."], "sample_ids": ["7d963adb841f47a39cf3386182b1ccf3", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["color, shape, material", "image, color, white"], "captions_pred_pc": ["a black and white image of a bottle with dots on it", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a 3d model of a tower made from blocks", "a 3d model of a man with his arms outstretched"], "question": "which entity is a white image?", "label": 1}, {"captions": [" a small building with stairs and a glass floor, featuring a square table and a black square ceiling light.", " a large, ancient stone building, resembling a roman structure and a medieval castle."], "sample_ids": ["8aaad713b8834739b008ccf2f3d86cce", "f96abfafd34040a4bb09f5e2973403e9"], "properties": ["floor, table, light", "building, material, stone"], "captions_pred_pc": ["above a black and white photograph of a window", "a black and white drawing of the letter 'l' on a white background royalty free illustration"], "captions_pred_image": ["a black and white 3d model of a staircase on a platform", "3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosse"], "question": "which building is made of stone", "label": 1}, {"captions": [" a small pink stone pillar/column.", " a large white and metal building with a metal roof structure."], "sample_ids": ["bef329bb8d9f467cb86b258030dbf9ff", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["size, material, color", "roof, metal, white"], "captions_pred_pc": ["a black and white image of a small square with black dots", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a 3d model of a stone column on a white background", "a 3d model of a large white box"], "question": "which is larger", "label": 1}, {"captions": [" of a purple laundry rack with multiple lines resembling wires.", " a long row of steel shelves in a warehouse, featuring a suspended scaffolding system."], "sample_ids": ["1f0dce1431a842b8bdb24ac4dd05f386", "578fe7a7bd754b889be33aea99cf5050"], "properties": ["color, shape, material", "a, material, steel"], "captions_pred_pc": ["a black and white image of a metal structure", "above a black and white image of a rack with multiple shelves"], "captions_pred_image": ["a 3d model of a wire rack on a white background", "a 3d model of a large metal structure"], "question": "which is made of metal", "label": 1}, {"captions": ["a 3d object featuring elements of a green and purple lamp, a cartoon character with a light bulb, an ice cream cone, a syringe, a light pole, a pen with a face, and a doctor's stethoscope.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["42d3657ce7bc4b5dbeb7f444e089a715", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["a lamp, a syringe, a light pole", "house, pool, balcony"], "captions_pred_pc": ["a spoon with black dots on a white background spoon with black dots on a white background, 3d illustration royalty free stock photo", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop", "a 3d model of a modern house"], "question": "which entity has a pool", "label": 1}, {"captions": ["a 3d object featuring elements of a green and purple lamp, a cartoon character with a light bulb, an ice cream cone, a syringe, a light pole, a pen with a face, and a doctor's stethoscope.", " a wooden roof structure with a pink roof."], "sample_ids": ["42d3657ce7bc4b5dbeb7f444e089a715", "b70565bda31d42958d3597bf6067ddd2"], "properties": ["a lamp, a syringe, a light pole", "roof, color, pink"], "captions_pred_pc": ["a spoon with black dots on a white background spoon with black dots on a white background, 3d illustration royalty free stock photo", "above a black and white image of a metal grate"], "captions_pred_image": ["a 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop", "a 3d model of the roof of a building"], "question": "which entity has a roof that is the color of pink", "label": 1}, {"captions": ["a black table lamp with a black shade and a hat on top.", " of a white spiral, earbuds, ceiling light, and silver ring with black and white scissors and design."], "sample_ids": ["31c00c8337de4854a20299d719136cce", "c69f60b389124ad9b4f81c64ec332054"], "properties": ["color, black, shade, black", "earbuds, light, ring"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a circular object on a white background royalty free illustration", "a black and white drawing of a needle and thread"], "captions_pred_image": ["a 3d model of a table lamp royalty free 3d model preview no.2", "a black and white illustration of a pair of sunglasses and a pair of scissors next to each other on a white background"], "question": "which entity has a light?", "label": 1}, {"captions": ["a 3d white arrow, letter s, toothpick, skateboard, and knife forming a logo.", "s of a cat, fish, person with a hat, pig with a green hat and swimsuit, and a green and pink bird."], "sample_ids": ["2ee9dcd863514073a849ece8ea7714dd", "402601779d1d4146b4cde106dfff1b27"], "properties": ["3D, toothpick, skateboard", "s, cat, fish, person, pig, bird"], "captions_pred_pc": ["above a black and white image of a person's hand holding a pencil", "above a black and white photo of a toy octopus on a white background"], "captions_pred_image": ["a white toothbrush on a gray background", "a snowflake in the air on a cloudy day"], "question": "which entity has a toothpick?", "label": 0}, {"captions": [" featuring a chair, table, and refrigerator.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["cd967e38e9364ed28c4090e905740c9d", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["chair, table, refrigerator", "island, terrain, water"], "captions_pred_pc": ["a 3d rendering of a white surface with black dots on it", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a chair, a table, and a refrigerator", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": ["a 3d wooden pole with a metal spear handle.", "a silver ring with a swirly design and a white 3d printed sphere."], "sample_ids": ["1d5cf234576e41f0ba209c5e19d2db47", "8d81b384b5cc4f46a1779d0a2f5f7e27"], "properties": ["- material is wood, metal, metal", "color, silver, white"], "captions_pred_pc": ["of a black and white illustration of a vase on a pedestal", "a black and white illustration of a circle with dots"], "captions_pred_image": ["a black and white image of an old-fashioned pitchfork", "a 3d model of a silver ball on a gray background"], "question": "which object is made of metal", "label": 1}, {"captions": [" a red and silver metal plate with bolts and screws on it.", " of two rocks with ice elements."], "sample_ids": ["d4e4ead1ea144529b47e5882fc8a3a93", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["color, material, shape", "image is a rock with ice elements"], "captions_pred_pc": ["of a white square tile with a black hole in the center", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d rendering of a metal plate with screws and nuts", "a 3d image of two rocks on a gray surface"], "question": "which entity is a rock?", "label": 1}, {"captions": ["a featuring a ship, large rock, stone slab, ruined building, stone floor, small stone structure, triangular object, and a piece of concrete.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["d83c5a2fd61c4e9f927d1d7b7c9e5aae", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["ruined building, stone floor, small stone structure", "door, lock, handle"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a mountain", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a 3d model of a piece of broken pottery", "a black and white image of a door with a crack in it"], "question": "which door has a lock", "label": 1}, {"captions": [" a blue and white building structure with a table and suspended ceiling.", " a robot with a blue, purple, and white body."], "sample_ids": ["fc63507c452c4a4b89f614f2b8bff76a", "6f98acb9e03c4cbd9c83f2c8f9cd3ddc"], "properties": ["color, table, ceiling", "body, color, white"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a room", "above a black and white image of a robot"], "captions_pred_image": ["a 3d model of a table with multiple tables stacked on top of each other", "a 3d model of a robot standing in the middle of a white background"], "question": "which entity has a white body?", "label": 1}, {"captions": ["a white teddy bear, cloud, skull, octopus, and bird in various positions on a gray background.", "a black and gold cylindrical object with a gold handle, resembling a cigarette lighter, lamp, and pen."], "sample_ids": ["dd5849aced0443b1b4b38d413f7e06c4", "c9b1c89380e947f58aa06eb56c93c6d8"], "properties": ["background, color, white", "- color is black and gold- shape is cylindrical- material is metal"], "captions_pred_pc": ["a black and white image of a cat's head", "a black and white image of a circular object on a white background"], "captions_pred_image": ["a 3d model of an animal skull in white on a gray background", "a 3d model of a black and white object on a gray background"], "question": "which object is made of metal", "label": 1}, {"captions": ["s of chimpanzee, human, and wolf skulls.", "a 3d object featuring a white tray with a decorative pattern, a silver tray with a bird, a laptop, a long knife, and a metal bar."], "sample_ids": ["16a6e8b5830b4da6828af63e91f75f9b", "b1099ba41d9f4af19d1a91761bb6074c"], "properties": ["s, chimpanzee, human, wolf", "Object, Tray, Tray"], "captions_pred_pc": ["a black and white photograph of a skull on a white background", "above a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of a gorilla skull royalty free 3d model preview no.2", "a 3d image of a white tray with an intricate design"], "question": "which object is not a tray?", "label": 0}, {"captions": [" a house with a roof, roof truss, and suspended ceiling structure.", " a wooden house with a roof and framing."], "sample_ids": ["5abf69f79b92484fb54d41ff0c0a2c11", "4634a9bdf54549a99f68be77f1464b0a"], "properties": ["roof, truss, suspended ceiling", "roof, framing, material"], "captions_pred_pc": ["a suitcase made of dots on a white background a suitcase made of dots on a white background royalty free illustration", "a black and white drawing of an abstract pattern"], "captions_pred_image": ["a 3d model of a house with roof trusses", "a 3d model of a barn structure"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": [" a small house with a roof.", " a large white and metal building with a metal roof structure."], "sample_ids": ["0d2246e433ce4066b76489f17ba8d694", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["roof, house, small", "roof, metal, white"], "captions_pred_pc": ["a black and white square made up of small dots on a white background", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a 3d model of a house with a triangular roof", "a 3d model of a large white box"], "question": "which building has a metal roof", "label": 1}, {"captions": [" a house featuring a roof, floor plan, small bathroom, pool, and ceiling light fixtures.", " of a white plastic tube with a hole and a chip on it."], "sample_ids": ["6c6549e2975a48a1b59ebe2a6562900e", "9968e06a62e8487ea33460e640abc573"], "properties": ["floor plan, bathroom, pool", "color is white, material is plastic, shape is tube"], "captions_pred_pc": ["the floor plan of the house the floor plan of the house on a white background royalty free illustration", "a black and white image of a broom on a stand"], "captions_pred_image": ["a 3d model of a small house", "a white object on a gray background"], "question": "which object is not a house?", "label": 1}, {"captions": [" of a rock formation with a white cliff and a rock.", " of a stone wall with a window and a clock."], "sample_ids": ["4a25f6dfbea943bca137dacd2f7b984f", "46fc13a04c8b47fa86215b9efc2eb1f9"], "properties": ["image is rock formation with a white cliff and a rock", "window, clock, wall"], "captions_pred_pc": ["above a black and white map of spain on a white background", "a black and white illustration of a bullet in the middle of a starry sky"], "captions_pred_image": ["a black and white image of a rock formation on a gray background", "a 3d model of a brick wall"], "question": "which entity is a wall?", "label": 1}, {"captions": [" a chair with a yellow seat.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["a4de5f2055154465968f33d6289c64e6", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["color, yellow, seat", "roof, color, yellow"], "captions_pred_pc": ["above a black and white drawing of a square with small dots", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a chair on a white background royalty free 3d model no.", "a 3d model of a house with a roof"], "question": "which entity has a yellow roof", "label": 1}, {"captions": ["a small yellow and black helicopter flying in the air.", " a white and yellow metal shelving unit with a steel structure, yellow legs, and suspended ceiling system."], "sample_ids": ["7qxP6dQ5nNuaG8d0vswXXKnd0vq", "c1a7d264b34841409009b3d5d39d5b99"], "properties": ["color, yellow, black", "Steel, Color, Yellow"], "captions_pred_pc": ["a black and white illustration of an airplane", "a black and white illustration of a building"], "captions_pred_image": ["a black and white helicopter flying in the air", "a 3d model of a table with multiple tables and chairs"], "question": "which entity is made of yellow metal", "label": 1}, {"captions": ["a featuring a white and blue structure with a table, blue blocks, and suspended blue cubes.", " of a white human skull with broken bone elements."], "sample_ids": ["1d2cfe3a03004b62b17d3ce065658302", "6ca0f91b85464d7a845b3977351dd0b5"], "properties": ["color, table, blocks", "color, white, skull"], "captions_pred_pc": ["a group of people standing on top of each other on a white background a group of people standing on top of each other on a white background royalty free illustration", "a black and white image of a cat's x-ray"], "captions_pred_image": ["a 3d model of a table with four legs", "a 3d model of a human skull in white"], "question": "which entity is white", "label": 1}, {"captions": [" a spacious office featuring numerous desks, chairs, computers, and a ceiling with blue and white triangles and a ceiling light.", " of a white chest of drawers with legs."], "sample_ids": ["0d7e4d9471414a21b4a5b18a54f7ec22", "f00dfa8b5e7e4fc6bbf97d718b66f390"], "properties": ["ceiling, light, desks", "chest of drawers, legs, white"], "captions_pred_pc": ["a black and white drawing of a square on a white background", "of a black and white leopard print rug"], "captions_pred_image": ["a 3d model of an office space with desks and chairs", "a 3d rendering of a white dresser"], "question": "which object has legs", "label": 1}, {"captions": [" a white rocking chair with a curved backrest.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["ee0deb90abf943b6894cd5ded1331213", "6b745457e06840119058883b35f78f58"], "properties": ["backrest, curved, yes", "roof, color, blue"], "captions_pred_pc": ["a black and white illustration of a spiral pattern on a white background a black and white illustration of a spiral pattern on a white background royalty free illustration", "a black and white image of a building with dots"], "captions_pred_image": ["a 3d model of a white chair royalty free 3d model no. 3", "a 3d model of a house with a steeple on top"], "question": "which entity has a blue roof", "label": 1}, {"captions": [" of a row of insects and animals on cubes, with flowers and a snake on a stick.", " of a beige and white round soap dish/small bowl."], "sample_ids": ["fdecae38142646179e3e37d95e36ca50", "5414d75e47104589837f3df8b6de6d22"], "properties": ["a, b, c", "beige, white, round"], "captions_pred_pc": ["for a black square tile on a white background", "of a 3d model of a bracelet 3d model of a bracelet on a white background royalty free illustration 2019"], "captions_pred_image": ["a line of white cubes with various insects on them", "a white ceramic bowl sitting on top of a gray surface"], "question": "which object is white?", "label": 1}, {"captions": [" a small house with a roof.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["9578e8de15ec44ce802072aaa4df3910", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["roof, house, small", "house, pool, balcony"], "captions_pred_pc": ["above a black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a modern house"], "question": "which house has a pool", "label": 1}, {"captions": ["a 3d cartoon character wearing a hat and holding various items, such as a sword, gun, broomstick, and stick.", "a yellow gold ring with an engraved quote, \"strength and love.\""], "sample_ids": ["9863dee114264b89a95dca4c78d08424", "c155767db07340f2813c1b3dfa8d63b9"], "properties": ["hat, sword, gun", "ring, material, gold"], "captions_pred_pc": ["for a black and white illustration of a small monster holding a spoon in one hand and a cup of coffee in the other", "of a black bangle bracelet on a white background"], "captions_pred_image": ["a 3d model of a police officer holding a baton and wearing a hat and helmet", "a white wedding band with the words strength in weakness engraved on it"], "question": "which object is made of yellow gold", "label": 1}, {"captions": ["a 3d white axe, hammer, and spoon.", " of two rocks with ice elements."], "sample_ids": ["96d127abd21049689918e671ec613ef8", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["axe, hammer, spoon", "image is a rock with ice elements"], "captions_pred_pc": ["of a black lace belt on a white background", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a small bedroom with wooden floors, walls, roof, and shelf.", " a large steel building with many columns and a pool."], "sample_ids": ["e602ac60041f4b4f84c044161e478781", "2ce649a4152a45bab60d8cafa1dcdeb3"], "properties": ["floor, wall, roof", "building material, pool, steel"], "captions_pred_pc": ["above a black and white image of a decorative metal bar", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d model of a room with wooden walls and a rug on the floor", "a 3d model of a concrete structure"], "question": "which building has a pool", "label": 1}, {"captions": ["a gray background featuring a white line in the middle.", " a building with yellow lines."], "sample_ids": ["47f89f92bef14b7193d0ffa3934f6977", "f18e34286cf54876874f55ecc9018492"], "properties": ["color, line, gray", "color, yellow, lines"], "captions_pred_pc": ["above a black and white image of a piece of furniture", "a black and white drawing of a map"], "captions_pred_image": ["an airplane flying in the sky with the sun shining behind it", "a drawing of an airplane flying over a city"], "question": "which entity has lines that are yellow?", "label": 1}, {"captions": ["a featuring a rock formation with various statues, including a woman, an eagle, and elements like wood and a shell.", "a featuring a boat, table, chairs, umbrella, and solar panel."], "sample_ids": ["36d90269173b4d1a84dbd61664593f66", "0f0eb3a198d341d28f809b6d7634be8a"], "properties": ["a, eagle, wood", "boat, table, chairs, umbrella, solar panel"], "captions_pred_pc": ["a black and white illustration of a map with dots all over it", "a black and white illustration of a boat with an umbrella"], "captions_pred_image": ["a 3d model of a person sitting on the edge of a cliff", "a 3d model of a boat, a table, chairs, and an umbrella"], "question": "which entity has a solar panel?", "label": 1}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", " a white plastic container with a lid, a small box, a cup, a bottle, and a jar."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "20a02705a66f460492e07345e84a62ed"], "properties": ["color, material, structure", "a box, a cup, a bottle, a jar"], "captions_pred_pc": ["a black and white drawing of a room with dots", "a black and white pattern of dots on a white background"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a 3d model of a plastic bottle, a plastic cap, and a plastic container"], "question": "which entity is not a building structure?", "label": 0}, {"captions": [" a small building with a staircase in a room.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["5a5b80af609a42acaebfdd086ae54336", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["room, staircase, building", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["a black and white image of the letter l", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a staircase on a white surface", "a white 3d model of a city skyline"], "question": "which image shows a laptop?", "label": 1}, {"captions": ["s of a rock, boat, plane, and leaf on a stick.", " a small house on an island with trees, shrubs, a pool, and a lake."], "sample_ids": ["be0884a7ced34b3d92687b6087798a1e", "c8331489fca44685bedfa1bdadf6ccb3"], "properties": ["s, stick, leaf", "house, lake, pool"], "captions_pred_pc": ["above a black and white drawing of an object floating in the sky", "a black and white image of a pattern on a piece of paper"], "captions_pred_image": ["a black and white photograph of a rock on a sandy surface", "a 3d model of a large building"], "question": "which entity has a pool", "label": 1}, {"captions": [" a brick building with a roof structure and roof truss.", " a mountainous landmass, resembling a small island or state like kentucky or wyoming."], "sample_ids": ["84e8acad28664a738df69d719df9e263", "8ca9b999b69c4965bd9eb4445d605bf2"], "properties": ["roof, structure, truss", "mountainous, landmass, state"], "captions_pred_pc": ["a black and white polka dots pattern on a white background polka dots pattern on a white background illustration", "a black and white map of the state of new york"], "captions_pred_image": ["a 3d model of a brick building with a roof", "a 3d model of a piece of paper"], "question": "which entity is not a state?", "label": 1}, {"captions": [" a small house with a roof.", " a wooden roof structure with a pink roof."], "sample_ids": ["9578e8de15ec44ce802072aaa4df3910", "b70565bda31d42958d3597bf6067ddd2"], "properties": ["roof, house, small", "roof, color, pink"], "captions_pred_pc": ["above a black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and", "above a black and white image of a metal grate"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of the roof of a building"], "question": "which roof is made of wood", "label": 1}, {"captions": [" a suitcase on wheels, featuring a bird, a laptop, a robot, and a vacuum cleaner.", " a house with a roof and beams."], "sample_ids": ["e8100bef7b8a48d4ac79684bffb349ba", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["Wheels, laptop, robot", "roof, beams, house"], "captions_pred_pc": ["a black and white drawing of a cell phone", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a black and white image of a suitcase on wheels", "a 3d model of a building with a roof"], "question": "which entity has a roof and beams", "label": 1}, {"captions": ["a featuring a small room with a bunk bed, desk, chair, table, and a blue house.", "a white of a man in a suit and mask, possibly a diving suit."], "sample_ids": ["dd3a9323ed514ccab330973ff9588015", "205251e4277e41d1aae6b2358267ad56"], "properties": ["room, bed, desk", "image, color, white"], "captions_pred_pc": ["a black and white drawing of a door", "a black and white image of a beetle on a white background"], "captions_pred_image": ["a 3d model of a small room with a bunk bed", "a 3d printed figurine of an alien creature"], "question": "which image is white", "label": 1}, {"captions": [" a submarine with a person on it.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["ab3cef03ce384d5e9175ecf664d5d536", "b896a0898efe4059a776193c02132129"], "properties": ["person, submarine, surface", "- material is stone, metal, concrete"], "captions_pred_pc": ["of a black and white mushroom on a white background", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a person riding a surfboard royalty free 3d model", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": [" a set of yellow and black tools, including a magnifying glass, hexagonal keys, and screwdrivers.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["40285a60f32749a8ae38957c7b073fe8", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["color, yellow, black", "island, terrain, water"], "captions_pred_pc": ["a set of three black and white keychains on a white background", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a pair of hexagonal keys and a pair of hexagonal scissors royalty free 3d model preview no 3", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" of a white building with a small house and a desk with a laptop.", " a yellow toy truck with large tires and a load of sticks."], "sample_ids": ["9244a2d3a9e94c8398ef991f1661bb58", "4e82177059204292899fa7415164a05e"], "properties": ["a, desk, laptop", "yellow, tires, load"], "captions_pred_pc": ["a black and white image of a piece of furniture", "in 15 words or less the image is of a toy truck made of glass beads stock illustration \u00a9 iStock/Getty Images"], "captions_pred_image": ["a 3d model of an office desk on a white background", "a 3d model of a monster truck with spikes"], "question": "which entity has a load of sticks", "label": 1}, {"captions": [" a building with yellow and white columns, wooden floor, and a ceiling featuring numerous yellow poles.", "a featuring a building, a square, a cloud with a square in the middle, a group of people in a room and a field, and clouds floating in the sky."], "sample_ids": ["1cf4b8f4e6014d36b6537c6ef52ccb96", "8557a15b9f244d2cbf16786dbc8b7b25"], "properties": ["floor, ceiling, poles", "building, room, sky"], "captions_pred_pc": ["a black and white drawing of a square with dots on it", "above a black and white image of a person's hand holding a paintbrush"], "captions_pred_image": ["a 3d model of a building with multiple levels", "a 3d rendering of white clouds floating in the air"], "question": "which entity has a room?", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " a small table with a staircase and a square ceiling light."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "ef0cbf4628b74253b885480deefe6fbd"], "properties": ["color, white, black, white", "table, staircase, light"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "for a black and white image of a small square on a white background"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d model of a table with a staircase"], "question": "which object has a light?", "label": 1}, {"captions": ["a 3d white cube featuring a hole, wheels, and a diamond.", " of a white rock-like object, possibly a shell or ice."], "sample_ids": ["e44009d33258425e8efedfbc6823bf70", "096e42b466ec438d95c5d89a85191534"], "properties": ["- color is white- shape is cube- material is plastic", "white, rock, shell"], "captions_pred_pc": ["for a black and white image of a toothbrush in the shape of a toothbrush", "in one hundred words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words"], "captions_pred_image": ["a 3d model of a white cube", "a 3d model of a white rock on a gray background"], "question": "which object is whiter", "label": 1}, {"captions": ["small 3d white boat model", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["b06d2d878ad2429498baa5c157dc6080", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["size is small, color is white, model is boat", "roof, color, yellow"], "captions_pred_pc": ["the product in the image", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a white rowing boat on a gray background", "a 3d model of a house with a roof"], "question": "which entity is a building?", "label": 1}, {"captions": [" a white box with colored buttons on it.", " a wooden shed with a gray roof."], "sample_ids": ["5a5269e17d134e238ec2b256405d8c10", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["color, shape, material", "roof, color, gray"], "captions_pred_pc": ["in 15 words or less a black and white patterned scarf on a white background royalty free illustration", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a white box with two buttons", "a 3d model of a shed with a gray roof"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a white plastic container with a lid, a small box, a cup, a bottle, and a jar.", " of a small white dollhouse featuring furnished rooms, including a bedroom with a bed and desk, and a bathroom."], "sample_ids": ["20a02705a66f460492e07345e84a62ed", "f178fb523ad7421aaa90a92ee736ee00"], "properties": ["a box, a cup, a bottle, a jar", "bedroom, bathroom, bed"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic cap, and a plastic container", "a 3d model of a small room with a bed, desk, and chair"], "question": "which entity has a bathroom", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " of a person breaking through a brick wall."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "0708be2186d24d52815e8ac9d751fc37"], "properties": ["roof truss, insulation, suspended ceiling", "image, brick, wall"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "for a black and white illustration of an ice cream cone"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d model of a person breaking through a brick wall"], "question": "which image shows a person breaking through a brick wall?", "label": 1}, {"captions": ["a featuring white and red cubes, and a pink and white chair.", "royalty-free of a golden gramophone with a wooden base and umbrella."], "sample_ids": ["f2c44a82ba744ba8b93e9a1c2272c117", "90bd720f583c4130a6273f5a94f6ae69"], "properties": ["color, white, red, pink", "image is royalty-free, gramophone, base"], "captions_pred_pc": ["a black and white illustration of a house made of dots", "a black and white illustration of a water droplet in the shape of a snowflake"], "captions_pred_image": ["a 3d model of a white structure with stairs", "a 3d model of an antique gramophone"], "question": "which image has a wooden base", "label": 1}, {"captions": ["a red toy robot cowboy with a hat and glasses.", "a 3d low poly model of a hammer and an axe with wooden handles."], "sample_ids": ["83a910fb2e714b7082fb7606fce83dc4", "c4e45a41478e42418399074b88d8920f"], "properties": ["color, hat, glasses", "axe, handle, wood"], "captions_pred_pc": ["a black and white image of a brake pad", "for a black and white image of a bird's wing"], "captions_pred_image": ["a 3d model of a cowboy hat on top of a vending machine royalty free 3d model no.2", "a 3d model of a knife on a white background"], "question": "which object has a wooden handle", "label": 1}, {"captions": [" a wooden table with black and white square design and metal poles.", " of two rocks with ice elements."], "sample_ids": ["ddff57e7f60e420688b477a1f78aa83f", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["design, poles, table", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white drawing of a square with dots", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a table with four legs and black and white stripes", "a 3d image of two rocks on a gray surface"], "question": "which entity is a photograph?", "label": 1}, {"captions": [" of a slice of bread and knife on a cutting board.", " a small house with stairs and a roof."], "sample_ids": ["0d5f5baa97754547ad517b694ea8edc7", "e9305c80010f4e3b9de9789f01a9bee5"], "properties": ["bread, knife, board", "roof, stairs, house"], "captions_pred_pc": ["above a black and white illustration depicting a galaxy with a black hole in the center", "above a black and white image of a square with dots all over it"], "captions_pred_image": ["a loaf of bread and a knife on a cutting board", "a 3d rendering of a podium on a wooden floor"], "question": "which object has a roof", "label": 1}, {"captions": [" a staircase with a railing, table, and chair, featuring a square ceiling light.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["51a0fba79bce472a8b827b78a110b77f", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["stair, table, chair", "house, pool, balcony"], "captions_pred_pc": ["for a black and white image of a toilet paper holder", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a staircase in a room", "a 3d model of a modern house"], "question": "which entity has a pool", "label": 1}, {"captions": [" of a blue security booth featuring a locked door and a \"clean way\" inscription.", " of two rocks with ice elements."], "sample_ids": ["d2ba0001656f477787b7df4567346a2f", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["color, inscription, booth", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white pattern of dots on a white background a black and white pattern of dots on a white background royalty free stock illustration", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d rendering of a gray metal box with an open door", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": ["a featuring a skeleton, torn paper, long stick, rock, and broken wood.", "a featuring a skeleton, torn paper, long stick, rock, and broken wood."], "sample_ids": ["46903bf029934b1989bc062dcb0a5531", "46903bf029934b1989bc062dcb0a5531"], "properties": ["skeleton, torn, paper, long stick, rock, broken wood", "skeleton, torn, paper, long stick, rock, broken wood"], "captions_pred_pc": ["a close up of a black object on a white background", "a close up of a black object on a white background"], "captions_pred_image": ["a 3d sculpture of a person's hand in the air royalty-free 3d model preview", "a 3d sculpture of a person's hand in the air royalty-free 3d model preview"], "question": "which entity has a skeleton", "label": 0}, {"captions": [" a staircase with a railing, table, and chair, featuring a square ceiling light.", " a wooden roof structure with a pink roof."], "sample_ids": ["51a0fba79bce472a8b827b78a110b77f", "b70565bda31d42958d3597bf6067ddd2"], "properties": ["stair, table, chair", "roof, color, pink"], "captions_pred_pc": ["for a black and white image of a toilet paper holder", "above a black and white image of a metal grate"], "captions_pred_image": ["a 3d model of a staircase in a room", "a 3d model of the roof of a building"], "question": "which entity has a roof", "label": 1}, {"captions": [" of a white crocodile", "a featuring a green frog face, bunny head, dragon head, flower, monster with a tail, and a monster mask with an open mouth."], "sample_ids": ["525e4c1c86564ea287acbe93397d6d91", "127753bf17de4252aaa7ea88f274545e"], "properties": ["color, shape, size", "face, mask, tail"], "captions_pred_pc": ["a black and white image of a bird flying in the sky", "a 3d model of an orchid flower on a white background 3d model of an orchid flower on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a piece of white paper", "a 3d model of a goat's head"], "question": "which entity has a mask?", "label": 1}, {"captions": ["a white 3d printed mickey mouse dice with various numbers and symbols on it.", "s of a boat, bird, paper airplane, and kite flying in the air."], "sample_ids": ["e2645ac544844f3c981203134a99c30c", "795cfa41d48a4cfc893ff1981318594d"], "properties": ["- material is plastic- shape is dice- color is white", "s, boat, bird, airplane, kite"], "captions_pred_pc": ["a circle of dots with the number 2 in the center", "above a 3d illustration of a boy standing with his arms outstretched"], "captions_pred_image": ["a 3d printed white dice with a mickey mouse face", "a white kite flying in the air against a gray background"], "question": "which entity is not a kite?", "label": 1}, {"captions": [" a blue cityscape with water elements, featuring a lake and a boat.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["6b4e3485993048ae86301a9720782751", "b896a0898efe4059a776193c02132129"], "properties": ["color, lake, boat", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white silhouette of an island on a white background", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of an island in the middle of the ocean royalty free 3d model preview no.2", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": ["a featuring a chair, a building, a glass tower, and a throne made of money, with various unique elements such as a green roof and a computer chip.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["18d2e75f23474d7489a6d7d605dfc76d", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["throne, chair, building", "island, terrain, water"], "captions_pred_pc": ["a black and white illustration of a person sitting on a bench", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a building on top of a table", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", " a large white and metal building with a metal roof structure."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["color, material, structure", "roof, metal, white"], "captions_pred_pc": ["a black and white drawing of a room with dots", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a 3d model of a large white box"], "question": "which building has a white roof", "label": 1}, {"captions": [" a white origami bird with silver wings.", " a black and purple dragon with wings and purple eyes."], "sample_ids": ["d9574287572c47f7a2b712cf1d919374", "9fdaa7bf7dbe499482d10705cbe366d2"], "properties": ["color, white, wings", "Eye color, Black, Purple"], "captions_pred_pc": ["a 3d model of the human body", "a black and white illustration of an airplane on a white background"], "captions_pred_image": ["a 3d model of a futuristic bird in flight on a gray background", "a 3d model of a dragon with wings"], "question": "which entity has eyes that are black and purple?", "label": 1}, {"captions": [" a blue circuit board with electronic components.", " the island of greece, featuring a small fish, a piece of ice, a tree branch, and a mossy piece of wood."], "sample_ids": ["4816a2780af54492b6692fd78347f1ac", "ee440fcbc493488e879539d37a8d820e"], "properties": ["color, blue, components", "island, fish, ice"], "captions_pred_pc": ["a black and white drawing of a person sitting on a bench", "above a black and white image of a mountain on a white background"], "captions_pred_image": ["a 3d printed circuit board with various electronic components", "a 3d map of the country of israel"], "question": "which entity is not a circuit board?", "label": 1}, {"captions": [" a staircase with a glass railing and a small white table, featuring a ceiling light.", " of a white plastic tube with a hole and a chip on it."], "sample_ids": ["10b899daca25493cba6bfffbbe7990fe", "9968e06a62e8487ea33460e640abc573"], "properties": ["railing, glass, table", "color is white, material is plastic, shape is tube"], "captions_pred_pc": ["above a black and white photograph of a cell phone", "a black and white image of a broom on a stand"], "captions_pred_image": ["a 3d rendering of a staircase with a glass railing", "a white object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" of a small white building with stairs and a lid.", " of a small white building with stairs and a lid."], "sample_ids": ["6ba301c579fa465fa454fe7487eb70cb", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["building, stairs, lid", "building, stairs, lid"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white box on a gray background", "a 3d model of a white box on a gray background"], "question": "which building has a lid?", "label": 0}, {"captions": [" a large building featuring stairs, a clock tower, a balcony, and a roof.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["e7c78316f9cb4b8aad57a9c933f5278b", "6b745457e06840119058883b35f78f58"], "properties": ["building, roof, balcony", "roof, color, blue"], "captions_pred_pc": ["a black and white illustration of a group of dots in the shape of a square on a white background royalty free illustration", "a black and white image of a building with dots"], "captions_pred_image": ["a 3d model of a building with a clock tower", "a 3d model of a house with a steeple on top"], "question": "which building has a roof that is blue?", "label": 1}, {"captions": [" of a rock formation with a white cliff and a rock.", " a clay pot with holes in it."], "sample_ids": ["4a25f6dfbea943bca137dacd2f7b984f", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["image is rock formation with a white cliff and a rock", "hole, material, clay"], "captions_pred_pc": ["above a black and white map of spain on a white background", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of a rock formation on a gray background", "a clay sculpture of a face with holes in it"], "question": "which entity is made of clay", "label": 1}, {"captions": ["a small yellow and black helicopter flying in the air.", " a city with buildings, houses, trees, and grass."], "sample_ids": ["7qxP6dQ5nNuaG8d0vswXXKnd0vq", "bc649e19956041cf89c1572f1a33cff1"], "properties": ["color, yellow, black", "buildings, houses, grass"], "captions_pred_pc": ["a black and white illustration of an airplane", "in 15 words or less a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a black and white helicopter flying in the air", "an image of a pile of trash on the ground"], "question": "which entity has more grass", "label": 1}, {"captions": ["yellow and white 3d corn on the cob model resembling a toothbrush.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["1fd0e7ffe26349da89815a6d4d6a189a", "a17477b445b3443189dad22f768b888b"], "properties": ["color, shape, material", "roof, pillar, stairs"], "captions_pred_pc": ["a black and white illustration of a circle made up of tiny dots", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": ["a red circle, red and white arrow, mouse, and child's handwriting.", " a house with roof trusses and wooden beams on a suspended ceiling."], "sample_ids": ["3c233f87bf264968a7f0660b9eac9e4a", "3c2e3a3670b042069bd8290e2c357702"], "properties": ["red, mouse, handwriting", "roof trusses, beams, suspended ceiling"], "captions_pred_pc": ["in 15 words or less a black and white image of a person's hand holding a pencil and drawing on a piece of paper", "above a black and white drawing of a building"], "captions_pred_image": ["a black and white drawing of a person's hand holding a pencil", "a 3d model of a house with a roof in progress royalty free 3d model preview no. 1"], "question": "which entity has a suspended ceiling", "label": 1}, {"captions": [" a red \"welcome to northwich\" billboard on a wooden base.", " a four-legged metal workbench with shelves."], "sample_ids": ["225e4094141d416faba7c5598dc55ff8", "e93b633d477942d9b79ef8ab566473d6"], "properties": ["base material is wood, color is red, message is welcome to northwich", "Four legs, Metal, Shelf"], "captions_pred_pc": ["a black and white illustration of a circular object with many small dots on it", "for a black and white illustration of a cross"], "captions_pred_image": ["a 3d model of a chalkboard on a pedestal royalty free 3d model preview no.2", "a 3d model of a table with four legs"], "question": "which object is made of metal", "label": 1}, {"captions": ["a watch with a blue and black dial, black and white face, grey strap, and metal band.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["625b0a764d0f40d48f3140acdd644823", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["dial, face, band", "camera, speaker, ceiling fan"], "captions_pred_pc": ["for a person wearing a black and white t-shirt with a logo on it", "for a black and white image of an object on a white background"], "captions_pred_image": ["a close-up view of the back of a watch strap on a gray surface", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": ["smiley-faced banana .", "a featuring a white box with a hole, a paper clip, and a lock."], "sample_ids": ["cc0099a687194a31a052ac761f5fdfea", "839e143bb1684aaa955f2c3e0cf4eef2"], "properties": ["face is smiling, banana is yellow, smiley face is a sticker", "box, paper clip, lock"], "captions_pred_pc": ["above a black and white image of a surfboard on a white background", "above a black and white image of a clock in the shape of a spiral"], "captions_pred_image": ["a 3d model of a banana with a smiling face", "a 3d model of a stapler with a staple in it royalty free 3d model no."], "question": "which object has a lock?", "label": 1}, {"captions": [" a white bunk bed with a ladder.", " of two rocks with ice elements."], "sample_ids": ["379f488d0624482694bbe150b7bc1059", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["bed, ladder, color, white", "image is a rock with ice elements"], "captions_pred_pc": ["for a white square on a white background with a black square in the center", "a black and white image of two rocks on a white background"], "captions_pred_image": ["the bunk bed royalty free 3d model no. 3", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": ["a featuring a train, large ship, long metal pipe, boat, black map of kenya, and a black piece of plastic.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["49c5a9d42ba64fb2b681ef583d700b98", "c3a82df41875402285608ef13a55df57"], "properties": ["a train, a ship, a boat", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["above a black and white image of a long, curved line on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a submarine", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": ["white s of a wall-mounted light, toilet with handle, faucet, and lamp with a light bulb.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["92052c493bf141a08b56f30f9c5e2d61", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["light, toilet, faucet", "house, pool, balcony"], "captions_pred_pc": ["in 15 words or less a 3d illustration of an object made of dots on a white background 3d illustration of an object made of dots on a white background royalty free illustration", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a white plastic toilet paper holder on a gray background", "a 3d model of a modern house"], "question": "which entity has a pool", "label": 1}, {"captions": [" a white rock with green grass and moss on it.", " a small table with a staircase and a square ceiling light."], "sample_ids": ["0b53ca3fca7b4ef7bef27cb287daf32e", "ef0cbf4628b74253b885480deefe6fbd"], "properties": ["color, grass, moss", "table, staircase, light"], "captions_pred_pc": ["above a black and white drawing of a long, curved line on a white background", "for a black and white image of a small square on a white background"], "captions_pred_image": ["a 3d sculpture of a piece of white chocolate in the shape of a map", "a 3d model of a table with a staircase"], "question": "which object is in a room?", "label": 1}, {"captions": ["a collection featuring a broken egg in a bowl, food wrapped in paper, torn and whole paper pieces, a white paper hat, a skull with a hat, a snowy iceberg, and a piece of cake.", " a large metal building with a roof and truss structure."], "sample_ids": ["3d7bd392c9a14f4ab7c0aae2cf75b487", "b85a99699ccd4bcba213322113bb253d"], "properties": ["hat, food, bowl", "roof, truss, structure"], "captions_pred_pc": ["in 15 words or less a black and white image of dots on a white background", "of a metal grate on a white background"], "captions_pred_image": ["a 3d model of the earth with a hole cut out of it", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": [" a celtic stone cross.", " a pink, ear-shaped object."], "sample_ids": ["95c9f6a3061c4850be22339959ac70d6", "0e08d777c7b948a784dc15748e7b173f"], "properties": ["- material is stone- shape is cross- color is black", "shape is ear, color is pink, material is plastic"], "captions_pred_pc": ["for a black and white image of a watch", "a black and white illustration of a rock on a white background"], "captions_pred_image": ["a 3d model of a celtic cross gravestone royalty free 3d model preview no.3", "a 3d model of a white object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a featuring a train, large ship, long metal pipe, boat, black map of kenya, and a black piece of plastic.", " a small house with a tree, pool, and pond in a green and blue landscape."], "sample_ids": ["49c5a9d42ba64fb2b681ef583d700b98", "e22489ba182f45cb81f0a83f22abe9bd"], "properties": ["a train, a ship, a boat", "house, tree, pool"], "captions_pred_pc": ["above a black and white image of a long, curved line on a white background", "in 15 words or less a black and white image of a square with dots around it"], "captions_pred_image": ["a 3d model of a submarine", "a 3d model of a house and a tree in a box royalty free 3d model preview no.3"], "question": "which entity has a pool", "label": 1}, {"captions": ["a featuring a teddy bear on a rainbow, accompanied by a baby, a cat, a dog, and a drink, with a pink and green sign displaying \"close the sky over dreams.\"", " a city with buildings, houses, trees, and grass."], "sample_ids": ["80dfbe37b3d74f11b712ca1ad6570f70", "bc649e19956041cf89c1572f1a33cff1"], "properties": ["image, color, pink", "buildings, houses, grass"], "captions_pred_pc": ["above a black and white photograph of a dog in a bowl", "in 15 words or less a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d sculpture of an animal on a piece of paper", "an image of a pile of trash on the ground"], "question": "which entity has more grass", "label": 1}, {"captions": [" a small house on an island with trees, shrubs, a pool, and a lake.", " a large, black and white circular building, resembling a stadium or ring structure."], "sample_ids": ["c8331489fca44685bedfa1bdadf6ccb3", "67f46bb0048244c687a58d1017a08f6b"], "properties": ["house, lake, pool", "building, color, black and white"], "captions_pred_pc": ["a black and white image of a pattern on a piece of paper", "the letter c in black and white illustration of the letter c in black and white illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustr"], "captions_pred_image": ["a 3d model of a large building", "a 3d model of a circular fence with black and white stripes"], "question": "which building is black and white?", "label": 1}, {"captions": [" of a wooden nightstand with a hexagonal pattern, white top, and drawer.", " a small house with a blue roof, a door, and a pool."], "sample_ids": ["e6f2dbec6d464b4da4aa47bce242f6e5", "40c52c2d278345c5b4e8d00a991271dc"], "properties": ["Drawer, Pattern, White", "door, roof, pool"], "captions_pred_pc": ["a cross made of dots on a white background vector illustration of a cross made of dots on a white background royalty free illustration illustration of a cross made of dots on a white background vector illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a", "of a black and white photo of a window with fringes"], "captions_pred_image": ["a 3d rendering of a wooden box with a handle", "a 3d model of a small house"], "question": "which entity has a door?", "label": 1}, {"captions": ["a white plastic bottle with a lid and cap.", "a featuring a red hat, floating cup, bowl filled with candy, and a strawberry."], "sample_ids": ["f7e60e3a8ee84ad0954d288c3f1a7220", "e27a9fd533dc41da9cf2eeb8fee2a5af"], "properties": ["color is white, material is plastic, shape is bottle", "hat, candy, strawberry"], "captions_pred_pc": ["of a black circular object on a white background", "a black and white illustration of two spheres"], "captions_pred_image": ["a 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a", "a black and white image of a person wearing a hat"], "question": "which object is made of candy", "label": 1}, {"captions": [" of a wooden staircase with marble floor and wooden railings in a house.", " a small island featuring a large building, trees, and a house on a hill, surrounded by a forest."], "sample_ids": ["ee70964ce5e841bd87381cff40d59b88", "d557c62e9be741a6b0f6b204d11a9c6f"], "properties": ["floor, staircase, railings", "house, hill, forest"], "captions_pred_pc": ["a black and white drawing of a light switch", "above a black and white illustration of a small island in the middle of a body of water"], "captions_pred_image": ["a 3d model of a staircase on a marble surface", "a black and white image of a small island in the middle of a body of water"], "question": "which house is on a hill", "label": 1}, {"captions": [" a molecule featuring green, red, and blue spheres.", " a four-legged metal workbench with shelves."], "sample_ids": ["1c0e821eb7c4489dbff9e20d7e8575a3", "e93b633d477942d9b79ef8ab566473d6"], "properties": ["color, sphere, molecule", "Four legs, Metal, Shelf"], "captions_pred_pc": ["a black and white photograph of a group of geometric shapes arranged in the shape of a diamond", "for a black and white illustration of a cross"], "captions_pred_image": ["a 3d model of a molecule in the shape of a pyramid", "a 3d model of a table with four legs"], "question": "which entity has four legs", "label": 1}, {"captions": ["a pair of green sneakers with orange laces and star designs.", " a pink-framed building structure with beams and trusses."], "sample_ids": ["8cf3790e236a4d9ebe21b028646792b2", "18e392c5360146eda498c5edab25b15c"], "properties": ["color, green, orange, star", "frame, beams, trusses"], "captions_pred_pc": ["a black and white photo of a pair of shoes", "a black and white drawing of a metal grate"], "captions_pred_image": ["a pair of converse sneakers on a white background with stars surrounding the shoes", "a 3d model of a building under construction"], "question": "which entity has a frame?", "label": 1}, {"captions": [" a building or house with a roof and floor plan, resembling a pyramid with a flat roof.", "a white of a city with buildings and a gold spoon."], "sample_ids": ["7a91292e1ed64e60a1bbbb499209a0df", "2351471a2d2145c59fec5f68ffae4816"], "properties": ["apse, roof, floor plan", "image, city, spoon"], "captions_pred_pc": ["a black and white drawing of a room", "a black and white image of a diamond shaped piece of fabric"], "captions_pred_image": ["a 3d model of a building in the shape of a pyramid", "a 3d model of a city skyline in white"], "question": "which image shows a city with buildings and a gold spoon?", "label": 1}, {"captions": [" a house with a blue roof, chimney, and wooden-beamed ceiling.", " a large house with a roof on a platform."], "sample_ids": ["b380dd4800124a8d96424a504eb0ec6a", "cb3e09a301b746918a682a595037c7f7"], "properties": ["roof, color, blue", "roof, platform, house"], "captions_pred_pc": ["of a white lace clutch purse on a white background", "a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of a building with many windows", "a 3d model of a small house"], "question": "which house has a roof on a platform", "label": 1}, {"captions": [" of a small clay pot/bowl with a stone and brick variation, accompanied by a piece of bread.", "a featuring a skeleton, torn paper, long stick, rock, and broken wood."], "sample_ids": ["936714802d2849bea3efb1eb5c83cee6", "46903bf029934b1989bc062dcb0a5531"], "properties": ["variation, bread, pot", "skeleton, torn, paper, long stick, rock, broken wood"], "captions_pred_pc": ["above a black and white drawing of an object", "a close up of a black object on a white background"], "captions_pred_image": ["a 3d model of an ancient pottery bowl", "a 3d sculpture of a person's hand in the air royalty-free 3d model preview"], "question": "which entity has a skeleton?", "label": 1}, {"captions": [" of a tree with leaves and roots.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["1c8dbf1b79c44cd09369d54897402528", "6b745457e06840119058883b35f78f58"], "properties": ["leaf, root, trunk", "roof, color, blue"], "captions_pred_pc": ["a black and white illustration of a tree on a white background", "a black and white image of a building with dots"], "captions_pred_image": ["a black and white image of a tree with roots", "a 3d model of a house with a steeple on top"], "question": "which entity has a blue roof", "label": 1}, {"captions": [" a house featuring a roof, floor plan, small bathroom, pool, and ceiling light fixtures.", " a small white building with a door, resembling a box-shaped house."], "sample_ids": ["6c6549e2975a48a1b59ebe2a6562900e", "1b5fe88d0ff149ae9d8b4eb455c5c90c"], "properties": ["floor plan, bathroom, pool", "shape is box, color is white, door is present"], "captions_pred_pc": ["the floor plan of the house the floor plan of the house on a white background royalty free illustration", "a black and white image of a person standing in front of a white background"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a white, open shelving unit"], "question": "which entity is a building?", "label": 1}, {"captions": [" a modern orange leather swivel recliner chair and ottoman.", " a damaged desert house with an open door and a cross, surrounded by sand and dirt."], "sample_ids": ["943ae5d90d654498912d0d3d0114ba35", "95e9d8f8ba3849feba93c2a7c7932a8c"], "properties": ["color, orange, leather", "house, door, cross"], "captions_pred_pc": ["a black and white illustration of a chair", "above a black and white image of a map"], "captions_pred_image": ["a white leather lounge chair and ottoman", "a black and white drawing of a house"], "question": "which object has a door", "label": 1}, {"captions": [" a house with roof trusses and wooden beams on a suspended ceiling.", " a destroyed car with rusted, broken metal and torn paper."], "sample_ids": ["3c2e3a3670b042069bd8290e2c357702", "3fe31c3bf5cd4574a8ca02222411a988"], "properties": ["roof trusses, beams, suspended ceiling", "metal, rusted, paper"], "captions_pred_pc": ["above a black and white drawing of a building", "a black and white drawing of a person sitting in a chair"], "captions_pred_image": ["a 3d model of a house with a roof in progress royalty free 3d model preview no. 1", "a black and white image of a piece of debris on the ground"], "question": "which entity is more likely to have rusted metal", "label": 1}, {"captions": [" a small white closet-like structure with a door, resembling a kitchen cabinet or miniature house.", " a small house with stairs and a roof."], "sample_ids": ["37954d951eb24f23a4956df5a683bb92", "e9305c80010f4e3b9de9789f01a9bee5"], "properties": ["door, white, structure", "roof, stairs, house"], "captions_pred_pc": ["a black and white drawing of a house made up of tiny dots", "above a black and white image of a square with dots all over it"], "captions_pred_image": ["a 3d model of a modern, minimalist bathroom", "a 3d rendering of a podium on a wooden floor"], "question": "which structure has a roof?", "label": 1}, {"captions": [" a small white house with windows and a black lid.", " of two rocks with ice elements."], "sample_ids": ["4b40af369c1149949f5ccb68becd8430", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["white, windows, lid", "image is a rock with ice elements"], "captions_pred_pc": ["above a black and white image of dots on a white background", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a white house with three windows", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a small house with a pink and blue roof and a white ceiling featuring wooden beams.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["443554d4d7044c66aa8cbff63c737589", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["roof, color, pink, blue", "house, roof, wooden"], "captions_pred_pc": ["a black and white square with dots all over it", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a roof with a triangular design royalty-free 3d model preview no.2", "a black and white photograph of a birdhouse"], "question": "which house has a wooden roof", "label": 1}, {"captions": [" a small gnome chandelier with three candles, featuring red and blue lights and adorned with pink and blue ribbons.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["6398f4e75a2d480da58396827ac64249", "c3a82df41875402285608ef13a55df57"], "properties": ["- color is red, blue, pink", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["for a black and white image of an object on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a white chandelier with three light bulbs hanging from the ceiling", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" of a chessboard on a beige ceiling tile.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["7dccc542a68f470a947bf5f698c27bbd", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["beige, tile, chessboard", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["a black and white image of a leopard print banner", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["3d chess board royalty free 3d model preview no 2", "a white 3d model of a city skyline"], "question": "which image shows a laptop?", "label": 1}, {"captions": ["a 3d white object resembling a sphere, frog, egg, hat, and shell.", " a small wooden cabinet with a drawer, doubling as a bedside table."], "sample_ids": ["0a8e0b95d8ce43ee9159ad01d925aad8", "3755f3c19ae549c4bf708462db1b2581"], "properties": ["shape is sphere, color is white, material is plastic", "Cabinet, drawer, wood"], "captions_pred_pc": ["a black and white illustration of a sponge in the shape of a sponge sponge black and white illustration of a sponge in the shape of a sponge royalty free illustration", "a black and white image of a square with dots all over it"], "captions_pred_image": ["a 3d sculpture of an apple on a white background", "a 3d model of a wooden box with a lid"], "question": "which object is made of wood", "label": 1}, {"captions": [" a house featuring a roof, floor plan, heating system, and ceiling light fixture.", " a small white house with stairs and a spiral staircase, featuring a white table and ceiling light."], "sample_ids": ["269939560e08432f9e134309b9a1c587", "e9e1cc7fae22458197a61f43a9c355f4"], "properties": ["floor plan, heating system, ceiling light fixture", "house, staircase, table"], "captions_pred_pc": ["a black and white drawing of a house", "above a black and white photograph of a dog in a frame"], "captions_pred_image": ["a 3d model of a building with a glass facade", "a 3d model of a small house with a spiral staircase"], "question": "which house has a staircase?", "label": 1}, {"captions": [" a pink room featuring a bed, desk, window, and lamp.", " a white bunk bed with a ladder."], "sample_ids": ["395af20de6fe49dbbbb030f0e452cbe1", "379f488d0624482694bbe150b7bc1059"], "properties": ["bed, desk, window", "bed, ladder, color, white"], "captions_pred_pc": ["of a black and white drawing of a curved line", "for a white square on a white background with a black square in the center"], "captions_pred_image": ["a 3d model of a bedroom royalty free 3d model preview no.2", "the bunk bed royalty free 3d model no. 3"], "question": "which bed has a ladder", "label": 1}, {"captions": [" a purple sphere with polka dots, flowers, and a hole, containing a pink ball inside.", " a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles."], "sample_ids": ["1000e3065aaa4d6fb93cea89b99e1748", "bd7aab78974643f5a0660c699daf8eb3"], "properties": ["color, shape, material", "roof, color, yellow"], "captions_pred_pc": ["a black and white image of a dotted circle on a white background royalty free illustration", "a black and white drawing of a room"], "captions_pred_image": ["a 3d model of an object with a ball in the center", "a 3d model of a table and chairs on a white background"], "question": "which entity has a roof that is the color of yellow", "label": 1}, {"captions": [" of a broken stone wall featuring an angel sculpture.", " a clay pot with holes in it."], "sample_ids": ["aae2c42740a04fd68068f5707111d26f", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["image is a sculpture of an angel on a wall", "hole, material, clay"], "captions_pred_pc": ["a black and white image of a toilet paper roll", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a marble sculpture of an angel", "a clay sculpture of a face with holes in it"], "question": "which object is made of clay", "label": 1}, {"captions": [" of a white plastic tube with a hole and a chip on it.", " of a stone wall with a window and a clock."], "sample_ids": ["9968e06a62e8487ea33460e640abc573", "46fc13a04c8b47fa86215b9efc2eb1f9"], "properties": ["color is white, material is plastic, shape is tube", "window, clock, wall"], "captions_pred_pc": ["a black and white image of a broom on a stand", "a black and white illustration of a bullet in the middle of a starry sky"], "captions_pred_image": ["a white object on a gray background", "a 3d model of a brick wall"], "question": "which object is made of stone", "label": 1}, {"captions": [" a small black, white, and orange walkie-talkie.", "a 3d lego character wearing a black helmet, gloves, and holding a gun."], "sample_ids": ["e098706085a44898abbd549693d12a64", "f71842519d2f44ceb9fa57e0c0db4aa0"], "properties": ["color is black, white, orange", "- color is black- material is plastic- shape is 3d"], "captions_pred_pc": ["of a white and black clutch with a flower design", "a sculpture in the form of a skull made of silver dots on a white background stock illustration \u00a9 iStock/Getty Images"], "captions_pred_image": ["a 3d model of a walkie-talkie royalty free 3d model preview no 3", "a lego ninja turtle wearing a black helmet and gloves"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " a tan, cylindrical hat with a hole in it."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "00c350e9f5f24fcd8bcc378da2963d4c"], "properties": ["color, white, black, white", "hat, color, tan"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "a black and white drawing of a mushroom on a white background royalty free illustration"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d model of an object with a hole in it"], "question": "which hat is tan", "label": 1}, {"captions": [" a small house with a tree, pool, and pond in a green and blue landscape.", " a rusted metal barrel with a yellow and red warning sign and stripe on it."], "sample_ids": ["e22489ba182f45cb81f0a83f22abe9bd", "5a49ad82ef7a4d33badea2261720f518"], "properties": ["house, tree, pool", "rusty, warning, metal"], "captions_pred_pc": ["in 15 words or less a black and white image of a square with dots around it", "a black and white drawing of dots on a white background a black and white drawing of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a house and a tree in a box royalty free 3d model preview no.3", "a black and white photograph of a barrel"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a white sculpture, resembling a horse and paper plane, on a gray background.", "a black and gold cylindrical object with a gold handle, resembling a cigarette lighter, lamp, and pen."], "sample_ids": ["179b4438edfc4a43a27a83784f38ff4b", "c9b1c89380e947f58aa06eb56c93c6d8"], "properties": ["color, background, white", "- color is black and gold- shape is cylindrical- material is metal"], "captions_pred_pc": ["above a black and white image of a sculpture in the shape of a bird", "a black and white image of a circular object on a white background"], "captions_pred_image": ["a 3d printed sculpture of a horse's head on a gray background", "a 3d model of a black and white object on a gray background"], "question": "which object is made of metal", "label": 1}, {"captions": [" the white text \"ekberkaslan\" with a row of white cubes and a numbered box.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["87ee30d475f34b799c24bf7ef3a7b540", "bded33af34104b9686b845dfd18309a9"], "properties": ["- color is white- shape is cubes- number is 1", "table, staircase, light"], "captions_pred_pc": ["a close up of a black and white striped scarf", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d image of the word ebercaskalan on a white background", "a 3d model of a small table with a staircase"], "question": "which object has a staircase?", "label": 1}, {"captions": ["a 3d white box with an open door and lid.", " a destroyed car with rusted, broken metal and torn paper."], "sample_ids": ["4e95f0eca97f48d6af1888a8bacec9f6", "3fe31c3bf5cd4574a8ca02222411a988"], "properties": ["- color is white - shape is box - material is plastic", "metal, rusted, paper"], "captions_pred_pc": ["a black and white square with dots all over it", "a black and white drawing of a person sitting in a chair"], "captions_pred_image": ["a 3d rendering of a white box with an open lid", "a black and white image of a piece of debris on the ground"], "question": "which object is made of metal", "label": 1}, {"captions": [" a green, spiked cube-like creature with eyes and a liquid-like texture.", " a tree with green leaves."], "sample_ids": ["0d2850148a154f3ca72d72817e120759", "1a902b67735845198efb1b84f434b580"], "properties": ["texture, spikes, eyes", "leaf, color, green"], "captions_pred_pc": ["a black and white drawing of a flower", "for a black and white drawing of a tree"], "captions_pred_image": ["a 3d model of an ice cube on a white background royalty-free 3d model preview no. 3", "a 3d model of a tree with a lot of foliage"], "question": "which entity has green leaves", "label": 1}, {"captions": ["a low poly of a tree with red apples and a green cactus with red dots.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["fd7765e391cd49ccbc72891d90850cdb", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["color, red, apples, tree, cactus, dots", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white illustration of a snowflake on a white background", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a group of geometric shapes", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a spacious office featuring numerous desks, chairs, computers, and a ceiling with blue and white triangles and a ceiling light.", " a small building with a yellow roof, featuring a box, a chair, and a plane flying overhead."], "sample_ids": ["0d7e4d9471414a21b4a5b18a54f7ec22", "a2354f13774340d392fbf33564934aab"], "properties": ["ceiling, light, desks", "building, roof, yellow"], "captions_pred_pc": ["a black and white drawing of a square on a white background", "a black and white image of a cell phone"], "captions_pred_image": ["a 3d model of an office space with desks and chairs", "a 3d rendering of a machine with a conveyor belt"], "question": "which building has a yellow roof", "label": 1}, {"captions": ["a 3d blue star featuring various text, including \"prchen,\" \"bible chen,\" \"rib chicken,\" and \"birch chen.\"", " a small house with stairs and a roof."], "sample_ids": ["0b712fc68ad44ce8a33592d2b26aac18", "e9305c80010f4e3b9de9789f01a9bee5"], "properties": ["color, shape, text", "roof, stairs, house"], "captions_pred_pc": ["a black and white image of a pair of sunglasses", "above a black and white image of a square with dots all over it"], "captions_pred_image": ["a white paper airplane flying over a gray background", "a 3d rendering of a podium on a wooden floor"], "question": "which entity has a roof", "label": 1}, {"captions": [" a vibrant city skyline featuring various colored buildings, trees, and skyscrapers.", " of a black and white striped fly in the air."], "sample_ids": ["1a1fb9b0d83845f6b1238fb45e0defff", "77e2b8c9032945248951e06bc7e209e0"], "properties": ["color, skyline, buildings", "black, white, fly"], "captions_pred_pc": ["a black and white illustration of a city skyline", "for a silhouette of a butterfly on a white background"], "captions_pred_image": ["a black and white 3d model of a city skyline", "a black and white photograph of a fly"], "question": "which entity is a fly", "label": 1}, {"captions": [" of a small wooden house with a blue roof, featuring a police sign and resembling a lighthouse.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["9b2c93d651c3409096118c5ce5b993f2", "c3a82df41875402285608ef13a55df57"], "properties": ["house, roof, blue", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white illustration of a coffee mug on a white background royalty free illustration", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a small house and barn", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" of a computer with a green screen, keyboard, and white box.", " a green mountain with trees and grass."], "sample_ids": ["fb1e5a04ef4644f98219e1d5d52ab073", "08fb23bdc67b4b0ba5fc64ea5c97e5f7"], "properties": ["screen, keyboard, box", "mountain, grass, tree"], "captions_pred_pc": ["a box made up of many small dots on a white background a box made up of many small dots on a white background royalty free illustration", "in 15 words or less a black ink blot on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a vintage computer royalty-free 3d model preview", "a 3d model of a mountain with snow on it"], "question": "which entity has more grass", "label": 1}, {"captions": [" a diverse town featuring houses, buildings, people, animals, and desert elements.", " a large house with a roof on a platform."], "sample_ids": ["436d6492fa06466680ecc82e5e07a7a0", "cb3e09a301b746918a682a595037c7f7"], "properties": ["house, building, people", "roof, platform, house"], "captions_pred_pc": ["a black and white drawing of a person sitting on a bench", "a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of a small town in the middle of a field", "a 3d model of a small house"], "question": "which entity has a roof", "label": 1}, {"captions": ["a 3d wooden sign featuring various texts, including \"resin park,\" \"shabaab corporation,\" and \"johnson sammons.\"", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["34a49861e7744acfb71de471a755e917", "b896a0898efe4059a776193c02132129"], "properties": ["text, material, shape", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white image of a metal bar with text on it", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d rendering of a marble slab with text on it", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": [" a small island with trees, water, and a river.", " a house with a roof and beams."], "sample_ids": ["5a9c593092c04deaa0f17a1c28a79476", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["water, river, island", "roof, beams, house"], "captions_pred_pc": ["in 15 words or less a black and white polka dot pattern on a white background", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a snowy landscape with trees in the foreground royalty free 3d model preview no.2", "a 3d model of a building with a roof"], "question": "which entity is a building?", "label": 1}, {"captions": [" of a white spiral, earbuds, ceiling light, and silver ring with black and white scissors and design.", "a featuring a building, a plane, a printing machine, a large machine, a room with a computer, and a large gray box."], "sample_ids": ["c69f60b389124ad9b4f81c64ec332054", "7e2b63ba4ce24cecacea67dd052016c1"], "properties": ["earbuds, light, ring", "building, plane, room"], "captions_pred_pc": ["a black and white drawing of a needle and thread", "a black and white image of a rectangular frame with dots on a white background a black and white image of a rectangular frame with dots on a white background royalty free illustration"], "captions_pred_image": ["a black and white illustration of a pair of sunglasses and a pair of scissors next to each other on a white background", "a 3d model of a box with a lot of items inside"], "question": "which entity has a room?", "label": 1}, {"captions": ["a featuring a train, large ship, long metal pipe, boat, black map of kenya, and a black piece of plastic.", " a rusty green metal box with a handle and a gun inside."], "sample_ids": ["49c5a9d42ba64fb2b681ef583d700b98", "76cfd0e88ce243d483919a018a4f1a9e"], "properties": ["a train, a ship, a boat", "box, handle, gun"], "captions_pred_pc": ["above a black and white image of a long, curved line on a white background", "a black and white square with dots on a white background"], "captions_pred_image": ["a 3d model of a submarine", "a 3d rendering of a metal box with a handle"], "question": "which object has a handle", "label": 1}, {"captions": ["a featuring a tv, chair, desk, laptop, man, couch, computer, and building.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["d4208427217343e6af1b9b4a42a2f730", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["tv, chair, laptop", "camera, speaker, ceiling fan"], "captions_pred_pc": ["for 3d illustration of a smartphone with a qr code on the back and a qr code on the front", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a person sitting on a couch", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": [" a tall glass tower featuring blue, green, and white squares.", " a large white and metal building with a metal roof structure."], "sample_ids": ["405d68eda26c401fbcddc7e3a457c74e", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["color, shape, height", "roof, metal, white"], "captions_pred_pc": ["of a black and white photo of a pair of earrings", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a black and white photograph of a metal sculpture on a pedestal", "a 3d model of a large white box"], "question": "which building has a metal roof", "label": 1}, {"captions": [" a pink room featuring a bed, desk, window, and lamp.", " a two-story small apartment building with a roof."], "sample_ids": ["395af20de6fe49dbbbb030f0e452cbe1", "8d1102e923954604ae7045a7ca14c1f6"], "properties": ["bed, desk, window", "two-story, roof, building"], "captions_pred_pc": ["of a black and white drawing of a curved line", "a black and white pattern of dots in the shape of the letter c on a white background vector illustration of a black and white pattern of dots in the shape of the letter c on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a bedroom royalty free 3d model preview no.2", "a 3d model of an apartment building royalty free 3d model preview no 2"], "question": "which entity is a building?", "label": 1}, {"captions": [" an old castle in a grassy field.", " of a small island featuring a white lighthouse, a fountain, and a grassy crater."], "sample_ids": ["26ea562f32d54afe919b73486dbf7d53", "2a30e69498ff4fd1a33c1fb72286f553"], "properties": ["field, grass, castle", "lighthouse, fountain, crater"], "captions_pred_pc": ["above a black and white image of a castle in the middle of a field", "a black beanie with sparkles on a white background"], "captions_pred_image": ["a black and white image of a broken column on a piece of paper", "a black and white image of an object on top of a pedestal"], "question": "which entity has a fountain?", "label": 1}, {"captions": ["a featuring a bowl of soup, a brush, a knife, a slice of pizza, a torn piece of paper, and a roof with a hole in it.", "a featuring a white box with a hole, a paper clip, and a lock."], "sample_ids": ["db19e46828c94b1a8b9a6ca9f673c604", "839e143bb1684aaa955f2c3e0cf4eef2"], "properties": ["a, roof, soup", "box, paper clip, lock"], "captions_pred_pc": ["in 15 words or fewer a black and white illustration of the moon", "above a black and white image of a clock in the shape of a spiral"], "captions_pred_image": ["a 3d model of a bowl and chopsticks on a sheet of paper", "a 3d model of a stapler with a staple in it royalty free 3d model no."], "question": "which entity has a lock?", "label": 1}, {"captions": ["a black motorcycle helmet with a face mask and visor.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["ad6df43a2ce24edfb15f5bb64755ed0d", "c3a82df41875402285608ef13a55df57"], "properties": ["color, black, visor", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white circular shape made up of many small dots on a white background a black and white circular shape made up of many small dots on a white background illustration", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a black motorcycle helmet with a visor on top", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a wall-mounted shelf with three compartments and vents.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["77df1bccf94249988927d3dda97ae273", "a17477b445b3443189dad22f768b888b"], "properties": ["compartments, wall-mounted, vents", "roof, pillar, stairs"], "captions_pred_pc": ["a black and white drawing of a boat with dots all over it", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d model of a wall mounted air conditioning unit on a gray background royalty free 3d model no.", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": ["a featuring a small room with a bunk bed, desk, chair, table, and a blue house.", " of a small wooden house with a blue roof, featuring a police sign and resembling a lighthouse."], "sample_ids": ["dd3a9323ed514ccab330973ff9588015", "9b2c93d651c3409096118c5ce5b993f2"], "properties": ["room, bed, desk", "house, roof, blue"], "captions_pred_pc": ["a black and white drawing of a door", "a black and white illustration of a coffee mug on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small room with a bunk bed", "a 3d model of a small house and barn"], "question": "which house has a blue roof", "label": 1}, {"captions": [" a set of yellow and black tools, including a magnifying glass, hexagonal keys, and screwdrivers.", " a house featuring a pink-purple roof with trusses and wooden ceiling beams."], "sample_ids": ["40285a60f32749a8ae38957c7b073fe8", "b6b6a3f82bdd47c3afaf9af885ba8703"], "properties": ["color, yellow, black", "roof, trusses, beams"], "captions_pred_pc": ["a set of three black and white keychains on a white background", "a black and white pattern of dots in the shape of a square royalty free illustration"], "captions_pred_image": ["a 3d model of a pair of hexagonal keys and a pair of hexagonal scissors royalty free 3d model preview no 3", "a 3d model of the roof of a building"], "question": "which entity has a roof", "label": 1}, {"captions": [" a small white gravestone on a grassy field.", "a featuring a bench with flowers, a bridge, a white sculpture, a bicycle with a flower, a giraffe, and a palm tree."], "sample_ids": ["b43779ae94f74bffba4f29863518f506", "12093c89a60941e7884b252bdc05104c"], "properties": ["location, field, gravestone", "giraffe, bench, flower"], "captions_pred_pc": ["a black and white illustration of a whale", "a black and white drawing of a gear on a white background"], "captions_pred_image": ["a 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake", "a 3d model of a sculpture made of sticks"], "question": "which entity has a bench with flowers?", "label": 1}, {"captions": [" a white rock formation with pebbles and ice elements.", "a featuring a tree stump, mossy wood, leaves, and a rock with grass."], "sample_ids": ["94d94f5a75de4bd0a43b08609630876e", "2527cd763a1a43f9870eb65e44e79f7d"], "properties": ["color, shape, texture", "mossy, rock, grass"], "captions_pred_pc": ["a black and white drawing of a pair of shoes on a white background royalty free illustration", "a black and white image of a person on a skateboard"], "captions_pred_image": ["a 3d model of a small rocky island in the middle of a body of water", "a 3d model of a piece of wood on a white background"], "question": "which entity is more mossy", "label": 1}, {"captions": [" of a white plastic tube or metal bar, resembling a knife.", " a black table with a metal shelf and folding door, featuring a hanging metal shelf from the ceiling."], "sample_ids": ["8fd3836862a44a8d8b4d224bfc30c2c3", "c7fdb8d8f32b415cb5d2a9b41dd7d77b"], "properties": ["a knife, blade, handle", "Black, Shelf, Metal"], "captions_pred_pc": ["a black and white image of a shelf with a white background", "a black and white drawing of a spiral staircase"], "captions_pred_image": ["a piece of white plastic on a gray background", "a 3d model of a black shelf with square tiles"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a small island featuring a white lighthouse, a fountain, and a grassy crater.", " a house with wooden framing and trusses."], "sample_ids": ["2a30e69498ff4fd1a33c1fb72286f553", "4501794e257c4a8ba60a94757d8e93a9"], "properties": ["lighthouse, fountain, crater", "frame, trusses, wood"], "captions_pred_pc": ["a black beanie with sparkles on a white background", "a black and white drawing of a window"], "captions_pred_image": ["a black and white image of an object on top of a pedestal", "a 3d model of a house under construction"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a modern office building with a green door, green roof, windows, and blue lights.", " a long row of steel shelves in a warehouse, featuring a suspended scaffolding system."], "sample_ids": ["d6087023095446fbadef1721478373b2", "578fe7a7bd754b889be33aea99cf5050"], "properties": ["door, roof, window", "a, material, steel"], "captions_pred_pc": ["a black and white drawing of a toilet brush", "above a black and white image of a rack with multiple shelves"], "captions_pred_image": ["a 3d model of an apartment building", "a 3d model of a large metal structure"], "question": "which entity is made of steel", "label": 1}, {"captions": [" a small white house with a staircase and a window.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["9eb88d17310d42dda9e17883e9922525", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["house, staircase, window", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white illustration of a person standing in the middle of a crowd of tiny black dots on a white background royalty free illustration", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d rendering of a small room with a staircase", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a white rock-like object, possibly a shell or ice.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["096e42b466ec438d95c5d89a85191534", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["white, rock, shell", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["in one hundred words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a white rock on a gray background", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a large, multi-floor building with columns, shelves, conveyor table, and a ceiling structure featuring pipes.", " a small house with stairs and a roof."], "sample_ids": ["6d773d2b0ed9437ea2b9b352bd8a5c25", "e9305c80010f4e3b9de9789f01a9bee5"], "properties": ["building, floor, columns", "roof, stairs, house"], "captions_pred_pc": ["in one line a black and white drawing of a dotted pattern on a white background", "above a black and white image of a square with dots all over it"], "captions_pred_image": ["a 3d rendering of a large white table with multiple shelves", "a 3d rendering of a podium on a wooden floor"], "question": "which entity has a roof", "label": 1}, {"captions": [" a yellow and white pickup truck with yellow wheels.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["6f6861c416be4a59ac201f8cb019c349", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["color, yellow, white, wheels", "house, pool, balcony"], "captions_pred_pc": ["a car made of dots on a white background a car made of dots on a white background stock illustration \u00a9 iStock/Getty Images", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a pickup truck royalty-free 3d model preview no.2", "a 3d model of a modern house"], "question": "which object has a pool", "label": 1}, {"captions": [" a small black, white, and orange walkie-talkie.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["e098706085a44898abbd549693d12a64", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["color is black, white, orange", "house, roof, wooden"], "captions_pred_pc": ["of a white and black clutch with a flower design", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a walkie-talkie royalty free 3d model preview no 3", "a black and white photograph of a birdhouse"], "question": "which object is made of wood", "label": 1}, {"captions": [" an ice cream machine, popsicle with two sticks, and a shelf with a computer monitor.", "a gnome toy wearing a striped hat."], "sample_ids": ["cb840159fea7436d81eb33bdccad3596", "72f220d616de466fa1315c1c9bbb5c4c"], "properties": ["A, a, a", "hat, toy, gnome"], "captions_pred_pc": ["a black and white illustration of a bench", "above a black and white photograph of a crocodile"], "captions_pred_image": ["a 3d rendering of a white and gray wall mounted shelf", "a stuffed gnome wearing a striped hat"], "question": "which toy is a gnome?", "label": 1}, {"captions": ["a green cube with white numbers 3 and 5 on it, and a black and blue dice with the number 2 on it.", "a green cube with white numbers 3 and 5 on it, and a black and blue dice with the number 2 on it."], "sample_ids": ["ec245f2665754c0ca59c69f7db590fe0", "ec245f2665754c0ca59c69f7db590fe0"], "properties": ["Numbers on cube are 3, 5, and 2; Color of cube is green; Color of dice is black and blue", "Numbers on cube are 3, 5, and 2; Color of cube is green; Color of dice is black and blue"], "captions_pred_pc": ["a black and white dotted square on a white background", "a black and white dotted square on a white background"], "captions_pred_image": ["a 3d model of a black cube with the number 3 on it", "a 3d model of a black cube with the number 3 on it"], "question": "which cube has the number 2 on it?", "label": 0}, {"captions": ["a white of a building with columns, stairs, and railings.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["c9ad30f336844b629cb237fa5b0d94f2", "bf18bfd89efd43389781050230467d58"], "properties": ["image, building, stairs", "Lights, number, five"], "captions_pred_pc": ["a black and white image of dots on a white background a black and white image of dots on a white background royalty free illustration", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a multi-level building with stairs and balconies royalty-free 3d model no.", "a white chandelier with five white shades"], "question": "which entity has more lights", "label": 1}, {"captions": ["yellow metal barricade ", "a 3d white object resembling a sphere, frog, egg, hat, and shell."], "sample_ids": ["d40f286201584313bf014f9cdc533af3", "0a8e0b95d8ce43ee9159ad01d925aad8"], "properties": ["color is yellow, material is metal, height is 0.5 m", "shape is sphere, color is white, material is plastic"], "captions_pred_pc": ["a white barrier on a white background", "a black and white illustration of a sponge in the shape of a sponge sponge black and white illustration of a sponge in the shape of a sponge royalty free illustration"], "captions_pred_image": ["a 3d model of a metal barrier royalty-free 3d model preview no.2", "a 3d sculpture of an apple on a white background"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a chocolate cake with a mushroom on top, displayed on a stand.", " featuring a moss-covered rock, mushroom in grass, and green leaves with a brown spot."], "sample_ids": ["728e8c8600584eacae80208bba7eece4", "34ebe81ae93841ca829efd15aee4d8c1"], "properties": ["chocolate, mushroom, cake", "moss, mushroom, grass"], "captions_pred_pc": ["a black and white image of a toilet paper holder", "for a black and white illustration of a cloud on a white background"], "captions_pred_image": ["a mushroom on top of a cake royalty free 3d model preview", "a 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor"], "question": "which mushroom is in the grass", "label": 1}, {"captions": [" a man in a suit holding a cell phone and a gun.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["c017bfb82db1460cafff604c0808793f", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["a, suit, cell, phone, gun", "camera, speaker, ceiling fan"], "captions_pred_pc": ["a silhouette of a person holding a cell phone", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d sculpture of a man holding a baseball bat in his left hand and a baseball glove in his right hand", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": [" a small gnome chandelier with three candles, featuring red and blue lights and adorned with pink and blue ribbons.", " a rusty green metal box with a handle and a gun inside."], "sample_ids": ["6398f4e75a2d480da58396827ac64249", "76cfd0e88ce243d483919a018a4f1a9e"], "properties": ["- color is red, blue, pink", "box, handle, gun"], "captions_pred_pc": ["for a black and white image of an object on a white background", "a black and white square with dots on a white background"], "captions_pred_image": ["a white chandelier with three light bulbs hanging from the ceiling", "a 3d rendering of a metal box with a handle"], "question": "which object has a handle", "label": 1}, {"captions": [" a toy motorcycle, car, and robot on an orange platform.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["7407a108e0354925b83b750339bc03df", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["platform, color, orange", "house, pool, balcony"], "captions_pred_pc": ["a black and white illustration of a bicycle", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a motorcycle on a pedestal", "a 3d model of a modern house"], "question": "which object has a pool", "label": 1}, {"captions": ["a 3d white object resembling a knife, sword, and airplane.", " of a yellow metal locker with legs, wheels, and metal brackets."], "sample_ids": ["d88df1cb10da467bb6f77af6aeaa8f86", "e3fde8fe782c41f0b141c9f1b8e13aa5"], "properties": ["shape is cylinder, color is white, material is plastic", "metal, legs, wheels"], "captions_pred_pc": ["of a spike on a white background", "a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white"], "captions_pred_image": ["a 3d model of a paper airplane", "a 3d model of an old metal locker"], "question": "which object is made of metal", "label": 1}, {"captions": ["s of a plane, house, pile of rocks, and islands, accompanied by watercolor paintings of a tree, rocks, and a boat in a sand dune.", " of a small wooden house with a blue roof, featuring a police sign and resembling a lighthouse."], "sample_ids": ["8da8da6ccf5f4011a4115977c55d1cb8", "9b2c93d651c3409096118c5ce5b993f2"], "properties": ["s, plane, house", "house, roof, blue"], "captions_pred_pc": ["a black and white silhouette of a map", "a black and white illustration of a coffee mug on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of a small island in the middle of a body of water", "a 3d model of a small house and barn"], "question": "which house has a blue roof", "label": 1}, {"captions": [" a small building with windows and a roof.", " a house featuring a roof with truss system, framing, insulation, and a ceiling light."], "sample_ids": ["0ef2cac27e364c0687afae7ab5040cc3", "39876e69e3914d99a07e0dc59611c5c0"], "properties": ["roof, windows, building", "roof, truss system, framing"], "captions_pred_pc": ["a black and white square made up of small dots on a white background", "a black and white drawing of a window with dots all over it"], "captions_pred_image": ["a 3d model of an apartment building royalty free 3d model preview no 3", "a 3d model of the roof of a house"], "question": "which entity has a roof with truss system", "label": 1}, {"captions": [" a table with two bowls and a cup on it.", " a small wooden cabinet with a drawer, doubling as a bedside table."], "sample_ids": ["41842c8d2ebd402da04def3c53c41633", "3755f3c19ae549c4bf708462db1b2581"], "properties": ["a, bowl, cup", "Cabinet, drawer, wood"], "captions_pred_pc": ["a black and white image of two eyes in the sky", "a black and white image of a square with dots all over it"], "captions_pred_image": ["a 3d model of a table on a marble floor royalty free 3d model preview no. 3", "a 3d model of a wooden box with a lid"], "question": "which object has a drawer", "label": 1}, {"captions": ["a featuring a staircase, small red and white building, red box, table, and ceiling light.", " of a wrecked plane, ship, and bird on a pile of rocks with grass."], "sample_ids": ["11e2e8ca1f8849e394dfbf532c6d7ae0", "b0c703df20154bbf9fd8707c61137fc5"], "properties": ["a, building, staircase", "plane, ship, bird"], "captions_pred_pc": ["for a black and white photograph of a piece of metal", "a black and white watercolor map of the state of ohio"], "captions_pred_image": ["a 3d model of a staircase in the middle of a floor", "a black and white photograph of a pile of debris on the ground"], "question": "which entity has a plane", "label": 1}, {"captions": ["a featuring a small room with a bunk bed, desk, chair, table, and a blue house.", "3d white playground set with slides and swings, featuring a plane, building, and dragon in the sky."], "sample_ids": ["dd3a9323ed514ccab330973ff9588015", "e694d53545d449319a64cceb0280c3c6"], "properties": ["room, bed, desk", "3d, slide, swing"], "captions_pred_pc": ["a black and white drawing of a door", "for a 3d model of the letter 'j'"], "captions_pred_image": ["a 3d model of a small room with a bunk bed", "a 3d model of a playground slide"], "question": "which entity has a slide?", "label": 1}, {"captions": ["a collection featuring a broken egg in a bowl, food wrapped in paper, torn and whole paper pieces, a white paper hat, a skull with a hat, a snowy iceberg, and a piece of cake.", " a broken human skull and stone bowl."], "sample_ids": ["3d7bd392c9a14f4ab7c0aae2cf75b487", "f7f1d8f726cb4ec6bda791aae99a10c2"], "properties": ["hat, food, bowl", "skull, bowl, human"], "captions_pred_pc": ["in 15 words or less a black and white image of dots on a white background", "a black and white illustration of a toilet bowl with dots all over it"], "captions_pred_image": ["a 3d model of the earth with a hole cut out of it", "a ceramic bowl with a large crack in the middle"], "question": "which object is not a skull", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " a small black house with a green roof, resembling a shed or container."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "bdb8e4c36ccb477890fd6ae569ae305c"], "properties": ["color, white, black, white", "black, roof, green"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "a black and white drawing of a square with dots all over it"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d model of a small black building"], "question": "which object has a green roof", "label": 1}, {"captions": [" of a broken stone wall featuring an angel sculpture.", " a small house with a blue roof."], "sample_ids": ["aae2c42740a04fd68068f5707111d26f", "fa21afd3a99d448cb23fa527a784769c"], "properties": ["image is a sculpture of an angel on a wall", "roof, color, blue"], "captions_pred_pc": ["a black and white image of a toilet paper roll", "a house made of dots on a white background a house made of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a marble sculpture of an angel", "a 3d model of a house with a porch and balcony royalty-free 3d model preview no.2"], "question": "which entity has a blue roof", "label": 1}, {"captions": [" of a snowman with a white umbrella and a white coat.", " tall grass, plants, rocks, and a tree."], "sample_ids": ["bbfff7ef6ab14b30bc7b5a3aa8391f95", "eefed882ed5f4711bc5a76332d9712f3"], "properties": ["color, umbrella, coat", "grass, plants, rocks"], "captions_pred_pc": ["a black and white image of a tree made of dots", "a black and white drawing of a tree"], "captions_pred_image": ["a 3d model of a snowflake on a white background", "a 3d model of a group of trees"], "question": "which entity has more grass", "label": 1}, {"captions": ["a featuring a small room with a bunk bed, desk, chair, table, and a blue house.", " a small house with a blue roof, a door, and a pool."], "sample_ids": ["dd3a9323ed514ccab330973ff9588015", "40c52c2d278345c5b4e8d00a991271dc"], "properties": ["room, bed, desk", "door, roof, pool"], "captions_pred_pc": ["a black and white drawing of a door", "of a black and white photo of a window with fringes"], "captions_pred_image": ["a 3d model of a small room with a bunk bed", "a 3d model of a small house"], "question": "which house has a door?", "label": 1}, {"captions": [" a large steel building with many columns and a pool.", " a cartoon panda bear wearing a diaper."], "sample_ids": ["2ce649a4152a45bab60d8cafa1dcdeb3", "e2c307d9fa2b4d40b4602537d7f71e24"], "properties": ["building material, pool, steel", "cartoon, bear, diaper"], "captions_pred_pc": ["a black and white drawing of a room with dots", "a 3d model of a teddy bear on a white background 3d model of a teddy bear on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a concrete structure", "a 3d model of a cute panda bear"], "question": "which entity is a cartoon?", "label": 1}, {"captions": [" a small house with stairs and a roof.", " a spiral staircase with a railing and wooden floor in a house."], "sample_ids": ["e9305c80010f4e3b9de9789f01a9bee5", "40921ffd69db479294554d261daf3035"], "properties": ["roof, stairs, house", "floor, railing, staircase"], "captions_pred_pc": ["above a black and white image of a square with dots all over it", "above a black and white image of a computer screen"], "captions_pred_image": ["a 3d rendering of a podium on a wooden floor", "a 3d model of a spiral staircase"], "question": "which staircase has a railing", "label": 1}, {"captions": [" a blue and white building structure with a table and suspended ceiling.", "a featuring a red hat, floating cup, bowl filled with candy, and a strawberry."], "sample_ids": ["fc63507c452c4a4b89f614f2b8bff76a", "e27a9fd533dc41da9cf2eeb8fee2a5af"], "properties": ["color, table, ceiling", "hat, candy, strawberry"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a room", "a black and white illustration of two spheres"], "captions_pred_image": ["a 3d model of a table with multiple tables stacked on top of each other", "a black and white image of a person wearing a hat"], "question": "which entity has a floating cup?", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " a small house with a tree in front, situated on a hill."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "3bde44b5f10946398f1bb9843dc14caa"], "properties": ["roof truss, insulation, suspended ceiling", "house, tree, hill"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "a black and white photo of a cell phone in a puddle of water"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d model of a house in the middle of a field"], "question": "which house has a tree in front", "label": 1}, {"captions": [" a small white building with a floor plan and ceiling light.", " a small island featuring a large building, trees, and a house on a hill, surrounded by a forest."], "sample_ids": ["1df55bb7035941cc9829aa904e2af065", "d557c62e9be741a6b0f6b204d11a9c6f"], "properties": ["floor plan, ceiling light, color", "house, hill, forest"], "captions_pred_pc": ["a line of dots on a white background a line of dots on a white background royalty free illustration", "above a black and white illustration of a small island in the middle of a body of water"], "captions_pred_image": ["a white 3d model of a house", "a black and white image of a small island in the middle of a body of water"], "question": "which entity has a house on a hill?", "label": 1}, {"captions": [" a small building with stairs and a glass floor, featuring a square table and a black square ceiling light.", " a table with pillar-like yellow and white poles, featuring a wooden structure, large tent, glass roof, and a chandelier with numerous glass tubes."], "sample_ids": ["8aaad713b8834739b008ccf2f3d86cce", "fa06167d83e54b05bdfbeeae2ca7c8a6"], "properties": ["floor, table, light", "table, structure, roof"], "captions_pred_pc": ["above a black and white photograph of a window", "a black and white image of a map with dots"], "captions_pred_image": ["a black and white 3d model of a staircase on a platform", "a 3d model of a building with many pillars"], "question": "which table has a glass roof", "label": 1}, {"captions": [" a silver spaceship-skull hybrid with blue lights, spheres, and jewels, featuring a robot with a head and blue eyes.", " of a gray ashtray with a cigarette in it."], "sample_ids": ["ae7c805076b14abe83578069c7bf1c03", "c9c7fe9b477a4565a11fd2eaf868c23b"], "properties": ["color, light, jewels", "ashtray, color, gray"], "captions_pred_pc": ["in 15 words or less a black and white image of a snowflake on a white background royalty free illustration", "a black and white illustration of a toilet brush on a white background toilet brush vector illustration on a white background toilet brush vector illustration on a white background"], "captions_pred_image": ["a 3d sculpture of a heart with various objects on top of it", "a 3d model of an ashtray with a cigarette in it"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a white sculpture, resembling a horse and paper plane, on a gray background.", "a 3d white object resembling a sphere, frog, egg, hat, and shell."], "sample_ids": ["179b4438edfc4a43a27a83784f38ff4b", "0a8e0b95d8ce43ee9159ad01d925aad8"], "properties": ["color, background, white", "shape is sphere, color is white, material is plastic"], "captions_pred_pc": ["above a black and white image of a sculpture in the shape of a bird", "a black and white illustration of a sponge in the shape of a sponge sponge black and white illustration of a sponge in the shape of a sponge royalty free illustration"], "captions_pred_image": ["a 3d printed sculpture of a horse's head on a gray background", "a 3d sculpture of an apple on a white background"], "question": "which object is whiter", "label": 1}, {"captions": ["a white 3d object featuring black and white patterns, resembling a combination of a dish, smoke detector, cake, bowl, alarm clock, and ceiling light.", "a featuring white and red cubes, and a pink and white chair."], "sample_ids": ["6a8cc820f00a4cfc954d56e2b1f6206a", "f2c44a82ba744ba8b93e9a1c2272c117"], "properties": ["- material is plastic- color is white- shape is cylinder", "color, white, red, pink"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a plate", "a black and white illustration of a house made of dots"], "captions_pred_image": ["a cake with a black and white design on the top of the cake", "a 3d model of a white structure with stairs"], "question": "which entity is a chair?", "label": 1}, {"captions": [" a small building with a yellow roof, featuring a box, a chair, and a plane flying overhead.", " a white building, table, and various piles of paper, including a low-poly object."], "sample_ids": ["a2354f13774340d392fbf33564934aab", "515210fb031f4ec89021ee8ce9e432e9"], "properties": ["building, roof, yellow", "- building is white, table is white, piles of paper are white"], "captions_pred_pc": ["a black and white image of a cell phone", "a black and white drawing of a piece of paper"], "captions_pred_image": ["a 3d rendering of a machine with a conveyor belt", "a 3d model of a snowy landscape"], "question": "which building is white", "label": 1}, {"captions": ["a small chicken figurine on a green base.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["a8abd4ab2a6943bfa4ac1e01caa109ef", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["base color, chicken, figurine", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["above a black and white illustration of a fish in the shape of a butterfly", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken 3d model of a chicken", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a cup, bottle, and two metal buckets on a chessboard with a square ceiling light fixture.", "s of a cat, fish, person with a hat, pig with a green hat and swimsuit, and a green and pink bird."], "sample_ids": ["05b5a5da1a0a4c1fa60a9e5edd5c3424", "402601779d1d4146b4cde106dfff1b27"], "properties": ["cup, bottle, chessboard", "s, cat, fish, person, pig, bird"], "captions_pred_pc": ["a black and white 3d shape made up of small dots on a white background", "above a black and white photo of a toy octopus on a white background"], "captions_pred_image": ["a 3d model of a chess set on a checkered board royalty-free 3d model", "a snowflake in the air on a cloudy day"], "question": "which entity has a pig in a swimsuit?", "label": 1}, {"captions": [" a modern orange leather swivel recliner chair and ottoman.", " of a yellow metal locker with legs, wheels, and metal brackets."], "sample_ids": ["943ae5d90d654498912d0d3d0114ba35", "e3fde8fe782c41f0b141c9f1b8e13aa5"], "properties": ["color, orange, leather", "metal, legs, wheels"], "captions_pred_pc": ["a black and white illustration of a chair", "a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white"], "captions_pred_image": ["a white leather lounge chair and ottoman", "a 3d model of an old metal locker"], "question": "which object is made of metal", "label": 1}, {"captions": ["s of a skateboard, snowboard, door, and three pairs of shoes, along with a paper mask and a paper with a hole.", " of a torn piece of paper, a hole in a wall, and a guitar with a sign on it, accompanied by a piece of metal, wood, and a large rock."], "sample_ids": ["a267d906e4cf4d36bfe841c6cc9e698b", "cc4ccf85d4c1425cb5975b8b5664d38a"], "properties": ["s of, snowboard, shoes, mask", "paper, hole, sign"], "captions_pred_pc": ["above a black and white image of a person standing on a piece of paper", "a silhouette of a map of the state of karnataka, india on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of a surfboard flying through the air", "an image of a torn piece of paper in the shape of a map"], "question": "which entity has a hole in it?", "label": 0}, {"captions": [" a small house with stairs and a balcony.", " of a small wooden house with a blue roof, featuring a police sign and resembling a lighthouse."], "sample_ids": ["0fbc5f16d301450c820b1f2158fd4f69", "9b2c93d651c3409096118c5ce5b993f2"], "properties": ["balcony, stairs, house", "house, roof, blue"], "captions_pred_pc": ["a black and white image of a square with dots on it", "a black and white illustration of a coffee mug on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a building with two floors and a balcony", "a 3d model of a small house and barn"], "question": "which house has a blue roof", "label": 1}, {"captions": [" a building or house with a roof and floor plan, resembling a pyramid with a flat roof.", " a house with a flat roof structure."], "sample_ids": ["7a91292e1ed64e60a1bbbb499209a0df", "abc52d210d71415296730bb00352ce6f"], "properties": ["apse, roof, floor plan", "roof, flat, structure"], "captions_pred_pc": ["a black and white drawing of a room", "a black and white drawing of a window with dots around it"], "captions_pred_image": ["a 3d model of a building in the shape of a pyramid", "a 3d model of a house with a roof"], "question": "which entity has a flat roof structure", "label": 1}, {"captions": [" a small house with a blue roof, a door, and a pool.", " a small white box with a shelf and a hole in it."], "sample_ids": ["40c52c2d278345c5b4e8d00a991271dc", "d023ae78bc5a436eaba13c5ecdd45c56"], "properties": ["door, roof, pool", "a, hole, shelf"], "captions_pred_pc": ["of a black and white photo of a window with fringes", "a black and white drawing of a dotted square on a white background"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a white box on a gray background"], "question": "which entity has a hole in it", "label": 1}, {"captions": ["a sword with a yellow handle and white blade.", " a small house with a staircase, balcony, and wooden floor."], "sample_ids": ["ee82fbc66d39467b8f34b91493053e66", "e67e211004cb450cbaf8139dd74ba39b"], "properties": ["color, handle, blade", "floor, staircase, balcony"], "captions_pred_pc": ["of a black and white image of a pen on a white background", "a black and white drawing of a wallet"], "captions_pred_image": ["a 3d model of a sword on a gray background", "a 3d model of a bench on a wooden floor"], "question": "which object has a staircase?", "label": 1}, {"captions": ["a featuring a bowl of soup, a brush, a knife, a slice of pizza, a torn piece of paper, and a roof with a hole in it.", " a small house with a roof and door, resembling a shack or shed."], "sample_ids": ["db19e46828c94b1a8b9a6ca9f673c604", "f1b557775310478893242180defa4d80"], "properties": ["a, roof, soup", "shack, roof, door"], "captions_pred_pc": ["in 15 words or fewer a black and white illustration of the moon", "a black and white illustration of a telephone on a white background"], "captions_pred_image": ["a 3d model of a bowl and chopsticks on a sheet of paper", "a 3d model of a small house in the middle of a field"], "question": "which entity has a roof", "label": 1}, {"captions": [" of a cannon with a wheeled cart.", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["bef830ddd37344209fcbf102fca0ef29", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["wheeled, cart, cannon", "lizard, rock, stuffed animal"], "captions_pred_pc": ["above a black and white image of a wheel with two wheels", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a cannon royalty free 3d model", "a 3d model of a vehicle with wheels and tires"], "question": "which entity has a stuffed animal?", "label": 1}, {"captions": [" a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light.", " a house with a roof and beams."], "sample_ids": ["5ea0962b100b4fccb761ed84afe027b5", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["house, table, chair", "roof, beams, house"], "captions_pred_pc": ["above a black and white photograph of an open door", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d rendering of a small white table with a chair", "a 3d model of a building with a roof"], "question": "which house has a roof and beams", "label": 1}, {"captions": [" a small white closet-like structure with a door, resembling a kitchen cabinet or miniature house.", " a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles."], "sample_ids": ["37954d951eb24f23a4956df5a683bb92", "bd7aab78974643f5a0660c699daf8eb3"], "properties": ["door, white, structure", "roof, color, yellow"], "captions_pred_pc": ["a black and white drawing of a house made up of tiny dots", "a black and white drawing of a room"], "captions_pred_image": ["a 3d model of a modern, minimalist bathroom", "a 3d model of a table and chairs on a white background"], "question": "which structure has a yellow roof", "label": 1}, {"captions": ["a 3d minecraft model of the acropolis and a city, featuring a desert building and a room with debris.", " a small house on an island with a boat and a bird on a rock."], "sample_ids": ["3cbdd3ff48804f9a8041b890838613ec", "1e56a92a0ddc41e59694bd1ad1656149"], "properties": ["acropolis, city, building", "house, rock, bird"], "captions_pred_pc": ["a black and white drawing of a dotted square on a white background a black and white drawing of a dotted square on a white background royalty free illustration", "a black and white drawing of a boat in the middle of a body of water"], "captions_pred_image": ["a 3d model of the temple of olympian zeus in athens, greece royalty free 3d model preview", "a 3d rendering of a house on a rock"], "question": "which entity has a bird on a rock?", "label": 1}, {"captions": [" of a house with a roof truss, chimney, and suspended ceiling.", "a featuring a white box with a hole, a paper clip, and a lock."], "sample_ids": ["9401dfc901b2447a9c0eb27da56854d7", "839e143bb1684aaa955f2c3e0cf4eef2"], "properties": ["roof truss, chimney, suspended ceiling", "box, paper clip, lock"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a", "above a black and white image of a clock in the shape of a spiral"], "captions_pred_image": ["a 3d model of a house with a roof", "a 3d model of a stapler with a staple in it royalty free 3d model no."], "question": "which entity has a lock?", "label": 1}, {"captions": [" a black and green sphere with blue lights.", " a wooden shed with a gray roof."], "sample_ids": ["de9edcb24820415da05370c37bb908c3", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["color, shape, light", "roof, color, gray"], "captions_pred_pc": ["a black and white map of a city with dots on it royalty free illustration", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of the sun in the sky", "a 3d model of a shed with a gray roof"], "question": "which entity has a roof that is the same color as the shed?", "label": 1}, {"captions": ["a 3d low-poly model of a gun with blue lights.", "a featuring a bench with flowers, a bridge, a white sculpture, a bicycle with a flower, a giraffe, and a palm tree."], "sample_ids": ["58cc3b1c08da4081a7ffdb5a00db6473", "12093c89a60941e7884b252bdc05104c"], "properties": ["3d model, color, lights", "giraffe, bench, flower"], "captions_pred_pc": ["above a black and white image of an object on a white background", "a black and white drawing of a gear on a white background"], "captions_pred_image": ["a 3d model of a futuristic weapon", "a 3d model of a sculpture made of sticks"], "question": "which entity has more flowers", "label": 1}, {"captions": [" a house with a yard, trees, bushes, and surrounding buildings.", " a house with a green, wooden-structured roof."], "sample_ids": ["7f8942ef51dd4246993a587a12df168c", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["house, yard, surrounding buildings", "roof, color, green"], "captions_pred_pc": ["a black and white image of a truck on a white background", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a 3d model of a house in the middle of a field", "a 3d model of a house with a triangular roof"], "question": "which house has a green roof", "label": 1}, {"captions": [" of a house with multiple roofs and a suspended ceiling, featuring a white and purple tray.", " of a wooden building frame with truss and roof structure."], "sample_ids": ["7907916e8f524df5b16eb566497db83e", "1313f8185cf24f3bbd73ff4e4ddfab3e"], "properties": ["color, roof, tray", "frame, truss, roof"], "captions_pred_pc": ["a black and white image of a metal object", "a black and white image of a ladder on a white background"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d rendering of a bridge over a road"], "question": "which entity has a roof structure", "label": 1}, {"captions": ["a 3d white cube with windows resembling a building.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["4a07a5293f024bb0a353954a056ef626", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["- material is white- color is white- texture is textured", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white image of a square made up of small dots", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a cube", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "c3a82df41875402285608ef13a55df57"], "properties": ["color, white, black, white", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" white crocs clogs with blue straps and holes.", " a small house with trees and plants, featuring a white box with a blue lid and a green triangle, accompanied by a christmas tree."], "sample_ids": ["4622e88fd9264b37997671efeb73af85", "4a889132cc444d10bfcbf6c760984416"], "properties": ["color, white, blue", "a, color, white"], "captions_pred_pc": ["of a black and white image of a shoe with holes", "a black and white illustration of a dandelion on a white background dandelion illustration on a white background stock illustration \u00a9 iStock/Getty Images"], "captions_pred_image": ["a 3d model of a white clog shoe royalty free 3d model preview no 1", "a 3d model of a desk and chair"], "question": "which entity has a white box with a blue lid?", "label": 1}, {"captions": ["a red circle, red and white arrow, mouse, and child's handwriting.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["3c233f87bf264968a7f0660b9eac9e4a", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["red, mouse, handwriting", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["in 15 words or less a black and white image of a person's hand holding a pencil and drawing on a piece of paper", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a black and white drawing of a person's hand holding a pencil", "a white 3d model of a city skyline"], "question": "which image shows a laptop?", "label": 1}, {"captions": [" a red \"welcome to northwich\" billboard on a wooden base.", "red swivel chair with a white base ."], "sample_ids": ["225e4094141d416faba7c5598dc55ff8", "7b78fb47a2684906bcc22ac6e848999a"], "properties": ["base material is wood, color is red, message is welcome to northwich", "color, red, base, white"], "captions_pred_pc": ["a black and white illustration of a circular object with many small dots on it", "a black and white image of a sphere made up of many small dots on a white background"], "captions_pred_image": ["a 3d model of a chalkboard on a pedestal royalty free 3d model preview no.2", "a 3d model of a modern chair"], "question": "which object has a white base?", "label": 1}, {"captions": [" a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole.", " a small house with a tree in front, situated on a hill."], "sample_ids": ["b16fb21cda9a4a21a024df749c2304f4", "3bde44b5f10946398f1bb9843dc14caa"], "properties": ["roof, ceiling, hole", "house, tree, hill"], "captions_pred_pc": ["a black and white image of a square with dots on it", "a black and white photo of a cell phone in a puddle of water"], "captions_pred_image": ["a 3d model of a small house and a tree in the foreground", "a 3d model of a house in the middle of a field"], "question": "which house has a tree in front", "label": 1}, {"captions": [" a small gnome chandelier with three candles, featuring red and blue lights and adorned with pink and blue ribbons.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["6398f4e75a2d480da58396827ac64249", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["- color is red, blue, pink", "torso, breasts, pattern"], "captions_pred_pc": ["for a black and white image of an object on a white background", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a white chandelier with three light bulbs hanging from the ceiling", "a 3d model of a woman's chest"], "question": "which entity is a torso?", "label": 1}, {"captions": [" a white hand, ear bud, teddy bear, earpiece, and utensil.", " of a small white building with stairs and a lid."], "sample_ids": ["1c59287d496f4da6b245a01d25a7e2a4", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["earbud, earpiece, earpiece", "building, stairs, lid"], "captions_pred_pc": ["a black and white drawing of a hand holding a cup", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a white object on a grey background", "a 3d model of a white box on a gray background"], "question": "which entity has a lid?", "label": 1}, {"captions": [" a shattered piece of paper, resembling a broken phone and a flying newspaper.", " a destroyed house and plane amidst a town with buildings."], "sample_ids": ["80d02e2b6ceb4a3a81b6b67d2d98bc0a", "0fd3ddca09194b8f94ef731af3b64a08"], "properties": ["shattered, resembles, broken phone", "house, plane, town"], "captions_pred_pc": ["for black ink brush strokes on a white background", "above a black and white drawing of a piece of paper"], "captions_pred_image": ["a 3d model of a snowboarder in mid-air over a snowy landscape", "a 3d model of a damaged building"], "question": "which entity is a destroyed house?", "label": 1}, {"captions": ["a black table lamp with a black shade and a hat on top.", " a cartoon character wearing a hat, holding a bag, a baseball bat, and a blanket, resembling a toy animal."], "sample_ids": ["31c00c8337de4854a20299d719136cce", "7e7272f3ddc24551905eccb63f3da42e"], "properties": ["color, black, shade, black", "hat, bag, blanket"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a circular object on a white background royalty free illustration", "a black and white illustration of a cell with dots"], "captions_pred_image": ["a 3d model of a table lamp royalty free 3d model preview no.2", "a 3d printed sculpture of a baseball player holding a bat"], "question": "which entity has a hat", "label": 1}, {"captions": [" a colorful sunburst with pink, blue, and purple hues.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["46946704a6344eb18718fc5710782f6b", "a17477b445b3443189dad22f768b888b"], "properties": ["color, hue, saturation", "roof, pillar, stairs"], "captions_pred_pc": ["of a black and white photo of a ceiling light fixture", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunbur", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": [" a diverse town featuring houses, buildings, people, animals, and desert elements.", "a featuring a building, a square, a cloud with a square in the middle, a group of people in a room and a field, and clouds floating in the sky."], "sample_ids": ["436d6492fa06466680ecc82e5e07a7a0", "8557a15b9f244d2cbf16786dbc8b7b25"], "properties": ["house, building, people", "building, room, sky"], "captions_pred_pc": ["a black and white drawing of a person sitting on a bench", "above a black and white image of a person's hand holding a paintbrush"], "captions_pred_image": ["a 3d model of a small town in the middle of a field", "a 3d rendering of white clouds floating in the air"], "question": "which entity has a building", "label": 1}, {"captions": ["s of a plane, house, pile of rocks, and islands, accompanied by watercolor paintings of a tree, rocks, and a boat in a sand dune.", " a small white airplane flying in the air."], "sample_ids": ["8da8da6ccf5f4011a4115977c55d1cb8", "747fb98f64794cdd96486debaf20a02c"], "properties": ["s, plane, house", "airplane, color, white"], "captions_pred_pc": ["a black and white silhouette of a map", "of a black and white image of a clock"], "captions_pred_image": ["a black and white image of a small island in the middle of a body of water", "a 3d model of a small airplane"], "question": "which airplane is white", "label": 1}, {"captions": [" a house with a purple roof and glass block structure.", " a house with a green, wooden-structured roof."], "sample_ids": ["e8ac7de076e54f07ace1a0ead07f6f57", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["roof, color, purple", "roof, color, green"], "captions_pred_pc": ["a black and white image of a fire hydrant on a white background fire hydrant on a white background royalty free illustration", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a 3d model of a building with a glass roof", "a 3d model of a house with a triangular roof"], "question": "which roof is made of wood", "label": 1}, {"captions": ["a gold robot arm with a thank you sign, holding a camera and featuring a light and keychain.", " a small, rocky island with diverse terrain and scattered rocks."], "sample_ids": ["3f010c9478164c3db00cab997fdafa40", "09f2cf267e954c958828325067bcc36a"], "properties": ["arm, light, sign", "island, terrain, rocks"], "captions_pred_pc": ["above a black and white image of a skateboard", "above a black and white photo of a small island in the middle of a body of water"], "captions_pred_image": ["a 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on", "a black and white image of a piece of dirt on the ground"], "question": "which entity has more rocks", "label": 1}, {"captions": [" a building with wooden and steel structures, featuring stairs, railings, and a ceiling with numerous pipes.", " of a wooden windmill with a red roof."], "sample_ids": ["26c47880756b4876b4f263373c3c5303", "2ad8fca30285483d8b7f602fa078215d"], "properties": ["building, material, steel", "roof, color, red"], "captions_pred_pc": ["a black and white drawing of a floor plan", "a white and black image of a snowflake in the shape of a snowflake"], "captions_pred_image": ["a 3d model of a large structure with multiple levels", "a 3d model of a windmill on a gray background"], "question": "which building has a red roof", "label": 1}, {"captions": [" a rifle with an orange handle, accompanied by a small rake and a knife.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["457043cede9848a6877ed2b5e13022d6", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["a, handle, orange, rake, knife", "island, mountain, grass"], "captions_pred_pc": ["for a black and white image of an object on a white background", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d model of a white plastic object on a gray background", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more grass", "label": 1}, {"captions": ["a collection of various bottles, including accutane, vitamin c with a smiley face, activespray, jolt 3d, active kids, pills, and active junior, all containing different types of medicine.", " a spider-like creature with long arms and legs."], "sample_ids": ["bf76d14fcac24579920aa326fa607a17", "199bcb789e0c439bb2eeb32f2425cc36"], "properties": ["accutane, vitamin c, activespray, active kids, pills, active junior", "arachnid, leg, arm"], "captions_pred_pc": ["a black and white illustration of a microscopic image of a virus on a white background royalty free stock illustration", "a black and white illustration of a spider on a white background"], "captions_pred_image": ["a bottle of activated charcoal pills on a white background royalty free 3d model no.", "a black and white image of an alien creature"], "question": "which entity has more legs", "label": 1}, {"captions": ["a 3d object featuring a flat roof with blue and white pattern, a purple and white bench, a blue and purple striped runner, a bed with blue and purple stripes, blue and white striped paper, a bench with a blue and purple pattern, and a ceiling light fixture with wooden slats.", " a small house with a tree in front, situated on a hill."], "sample_ids": ["28d0cf71ed684dcb87b2c9b2744c3633", "3bde44b5f10946398f1bb9843dc14caa"], "properties": ["runner, bed, bench", "house, tree, hill"], "captions_pred_pc": ["a black and white drawing of a snowflake on a white background a creative black and white drawing of a snowflake on a white background royalty free illustration", "a black and white photo of a cell phone in a puddle of water"], "captions_pred_image": ["a 3d model of a long, curved structure", "a 3d model of a house in the middle of a field"], "question": "which entity has a tree in front", "label": 1}, {"captions": [" of a house with multiple roofs and a suspended ceiling, featuring a white and purple tray.", " a house featuring a roof with truss system, framing, insulation, and a ceiling light."], "sample_ids": ["7907916e8f524df5b16eb566497db83e", "39876e69e3914d99a07e0dc59611c5c0"], "properties": ["color, roof, tray", "roof, truss system, framing"], "captions_pred_pc": ["a black and white image of a metal object", "a black and white drawing of a window with dots all over it"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d model of the roof of a house"], "question": "which roof is more complex", "label": 1}, {"captions": ["a featuring a tree stump, mossy wood, leaves, and a rock with grass.", " a damaged room with destroyed furniture, featuring a kitchen, bathroom, and broken window."], "sample_ids": ["2527cd763a1a43f9870eb65e44e79f7d", "06dd6456dc244a51b6b6e1c8524820de"], "properties": ["mossy, rock, grass", "room, furniture, window"], "captions_pred_pc": ["a black and white image of a person on a skateboard", "above a black and white drawing of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a piece of wood on a white background", "a 3d image of a room with a person in it"], "question": "which entity is more likely to have a window", "label": 1}, {"captions": ["a 3d white cube featuring various text and logos, including \"gypsy stribes,\" \"guinea pig studios,\" \"guillaume pi sodds,\" \"happy studios,\" and \"guillem pie sos.\"", "white ceramic urn with a red lid."], "sample_ids": ["5d08c34bfb2c4c9b9538e24d68761331", "ec15c810a38d4d45a36db910ecb2bcf8"], "properties": ["- material is plastic- color is white- shape is cube", "color, white, lid, red"], "captions_pred_pc": ["of a black and white photo of a person sitting on a bench", "a black and white photograph of a vase"], "captions_pred_image": ["a 3d image of a cube with the word 'stories' written on it", "a white ceramic vase sitting on top of a gray surface"], "question": "which object is made of ceramic?", "label": 1}, {"captions": [" of a white plastic tube or metal bar, resembling a knife.", " a large house with a roof on a platform."], "sample_ids": ["8fd3836862a44a8d8b4d224bfc30c2c3", "cb3e09a301b746918a682a595037c7f7"], "properties": ["a knife, blade, handle", "roof, platform, house"], "captions_pred_pc": ["a black and white image of a shelf with a white background", "a black and white image of a piece of paper"], "captions_pred_image": ["a piece of white plastic on a gray background", "a 3d model of a small house"], "question": "which object has a roof", "label": 1}, {"captions": [" a small green cannon wheel with a handle.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["97d13db38fa24556afa1eef04fc518e6", "b896a0898efe4059a776193c02132129"], "properties": ["color, green, handle", "- material is stone, metal, concrete"], "captions_pred_pc": ["of a small black object on a white background", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a cannon with wheels", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": [" a small white house with a staircase and a window.", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["9eb88d17310d42dda9e17883e9922525", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["house, staircase, window", "lizard, rock, stuffed animal"], "captions_pred_pc": ["a black and white illustration of a person standing in the middle of a crowd of tiny black dots on a white background royalty free illustration", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a small room with a staircase", "a 3d model of a vehicle with wheels and tires"], "question": "which entity has a stuffed animal?", "label": 1}, {"captions": ["yellow metal barricade ", " a black triangular metal object with a clock and cross on it, and a small machine on top."], "sample_ids": ["d40f286201584313bf014f9cdc533af3", "b198a81dc41c4fde8be3ca51c3b0e676"], "properties": ["color is yellow, material is metal, height is 0.5 m", "metal, cross, clock"], "captions_pred_pc": ["a white barrier on a white background", "above a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a metal barrier royalty-free 3d model preview no.2", "a 3d model of a piece of furniture"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a white rock-like object, possibly a shell or ice.", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["096e42b466ec438d95c5d89a85191534", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["white, rock, shell", "lizard, rock, stuffed animal"], "captions_pred_pc": ["in one hundred words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white rock on a gray background", "a 3d model of a vehicle with wheels and tires"], "question": "which object is on a rock", "label": 1}, {"captions": ["a small green and white 3d teapot in the shape of a turtle.", " of a small island featuring a white lighthouse, a fountain, and a grassy crater."], "sample_ids": ["8df8b28138e040a89303e91518b09d59", "2a30e69498ff4fd1a33c1fb72286f553"], "properties": ["shape is turtle, color is green, white", "lighthouse, fountain, crater"], "captions_pred_pc": ["above a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black", "a black beanie with sparkles on a white background"], "captions_pred_image": ["a white ceramic teapot on a gray background", "a black and white image of an object on top of a pedestal"], "question": "which is not a lighthouse", "label": 1}, {"captions": [" a house with a green roof and lawn.", " a small house with a red roof."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "085db9059b744673b5623b5338e02196"], "properties": ["roof, green, lawn", "roof, red, house"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "a black and white dotted square on a white background"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a 3d model of a small shed in the snow"], "question": "which house has a red roof", "label": 1}, {"captions": [" a house with a blue roof, chimney, and wooden-beamed ceiling.", " of a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["b380dd4800124a8d96424a504eb0ec6a", "8cd3f5ff0fc041eca9a480faa6739480"], "properties": ["roof, color, blue", "roof, trusses, beams"], "captions_pred_pc": ["of a white lace clutch purse on a white background", "in 15 words or less a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a building with many windows", "a 3d model of a roof structure"], "question": "which roof is made of wood", "label": 1}, {"captions": [" a small bedroom with wooden floors, walls, roof, and shelf.", " a small house with a roof and ceiling-mounted air conditioner."], "sample_ids": ["e602ac60041f4b4f84c044161e478781", "6965067ea9e34357a7af21a2f078fbac"], "properties": ["floor, wall, roof", "roof, air conditioner, house"], "captions_pred_pc": ["above a black and white image of a decorative metal bar", "a black and white illustration of a window"], "captions_pred_image": ["a 3d model of a room with wooden walls and a rug on the floor", "a 3d rendering of a small house with a covered porch"], "question": "which entity has a roof", "label": 1}, {"captions": [" of a toy mushroom character with a white and brown head.", " of a rock formation with a white cliff and a rock."], "sample_ids": ["ae8a73809d4647c09cc82f403e47de1d", "4a25f6dfbea943bca137dacd2f7b984f"], "properties": ["color, head, white and brown", "image is rock formation with a white cliff and a rock"], "captions_pred_pc": ["a black and white illustration of a butterfly sitting on a dandelion stock illustration", "above a black and white map of spain on a white background"], "captions_pred_image": ["a 3d model of a gray and white cartoon character", "a black and white image of a rock formation on a gray background"], "question": "which entity is a rock formation?", "label": 1}, {"captions": [" a child's room in a small house with windows.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["88847a6445044bcbab9611e6028a19b9", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["room, house, windows", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["for a black and white drawing of a snowflake", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a child's room with toys and furniture in it royalty free 3d model preview no.1", "a white 3d model of a city skyline"], "question": "which image shows a laptop?", "label": 1}, {"captions": [" a white and yellow metal shelving unit with a steel structure, yellow legs, and suspended ceiling system.", "a featuring a building, a square, a cloud with a square in the middle, a group of people in a room and a field, and clouds floating in the sky."], "sample_ids": ["c1a7d264b34841409009b3d5d39d5b99", "8557a15b9f244d2cbf16786dbc8b7b25"], "properties": ["Steel, Color, Yellow", "building, room, sky"], "captions_pred_pc": ["a black and white illustration of a building", "above a black and white image of a person's hand holding a paintbrush"], "captions_pred_image": ["a 3d model of a table with multiple tables and chairs", "a 3d rendering of white clouds floating in the air"], "question": "which entity has a room?", "label": 1}, {"captions": ["a grayscale of a mountain.", "a featuring a futuristic chair, a black cat with a sword, a person holding an umbrella, and a black and blue dragon, airplane, and helicopter."], "sample_ids": ["17a86c6c9be0478c984ef36c7c22edea", "4df70180f2ea400782d2e2de76063894"], "properties": ["color, grayscale, mountain", "color, black, blue"], "captions_pred_pc": ["above a black and white photograph of a galaxy", "a 3d illustration of a girl in a dress 3d illustration of a girl in a dress, isolated on a white background royalty free stock illustration"], "captions_pred_image": ["a 3d model of a mountain range on a white background royalty-free 3d model", "a black and white 3d model of a person holding a sword in the shape of a paper airplane"], "question": "which entity is a black and blue color scheme?", "label": 1}, {"captions": [" a small green chair with a slanted back and white base.", " a gray object featuring an axe, guitar headstock, and head."], "sample_ids": ["7f93c12cbbc74e579d5f0430cfa0010f", "ac5c86f38c8e4570a7eefff0958185cf"], "properties": ["color, white, base, slanted", "Headstock, Guitar, Head"], "captions_pred_pc": ["above a black and white drawing of a chair", "a black and white image of a toothbrush on a white background"], "captions_pred_image": ["a 3d model of a chair", "a 3d model of an axe head"], "question": "which object has a headstock", "label": 1}, {"captions": [" a brick building with a roof structure and roof truss.", " a wooden roof structure with a pink roof."], "sample_ids": ["84e8acad28664a738df69d719df9e263", "b70565bda31d42958d3597bf6067ddd2"], "properties": ["roof, structure, truss", "roof, color, pink"], "captions_pred_pc": ["a black and white polka dots pattern on a white background polka dots pattern on a white background illustration", "above a black and white image of a metal grate"], "captions_pred_image": ["a 3d model of a brick building with a roof", "a 3d model of the roof of a building"], "question": "which roof structure is made of wood", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", " a small island featuring a large building, trees, and a house on a hill, surrounded by a forest."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "d557c62e9be741a6b0f6b204d11a9c6f"], "properties": ["yellow, table, roof", "house, hill, forest"], "captions_pred_pc": ["a black and white drawing of a floor plan", "above a black and white illustration of a small island in the middle of a body of water"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a black and white image of a small island in the middle of a body of water"], "question": "which entity has a house on a hill?", "label": 1}, {"captions": ["purple potion bottle with a cork on top.", " a wooden roof structure with a pink roof."], "sample_ids": ["38eabecd9f4345de934ebd27d7504480", "b70565bda31d42958d3597bf6067ddd2"], "properties": ["color, shape, material", "roof, color, pink"], "captions_pred_pc": ["a black and white circular pattern on a white background", "above a black and white image of a metal grate"], "captions_pred_image": ["a white and gray bottle with a spoon inside", "a 3d model of the roof of a building"], "question": "which entity is made of wood", "label": 1}, {"captions": ["a clay sculpture of a man's head on a wooden block.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["52b7f2034d0044bb82cd714d6f7651d5", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["a, material, clay", "a, material, clay"], "captions_pred_pc": ["a black and white image of a man's head made up of dots", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a sculpture of a man's head on a pedestal", "a sculpture of a man's head on a pedestal"], "question": "which entity is a clay sculpture of a man's head on a wooden block?", "label": 0}, {"captions": [" a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light.", " of a small white building with stairs and a lid."], "sample_ids": ["5ea0962b100b4fccb761ed84afe027b5", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["house, table, chair", "building, stairs, lid"], "captions_pred_pc": ["above a black and white photograph of an open door", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a small white table with a chair", "a 3d model of a white box on a gray background"], "question": "which building has a lid?", "label": 1}, {"captions": [" a wall-mounted shelf with three compartments and vents.", " a suitcase on wheels, featuring a bird, a laptop, a robot, and a vacuum cleaner."], "sample_ids": ["77df1bccf94249988927d3dda97ae273", "e8100bef7b8a48d4ac79684bffb349ba"], "properties": ["compartments, wall-mounted, vents", "Wheels, laptop, robot"], "captions_pred_pc": ["a black and white drawing of a boat with dots all over it", "a black and white drawing of a cell phone"], "captions_pred_image": ["a 3d model of a wall mounted air conditioning unit on a gray background royalty free 3d model no.", "a black and white image of a suitcase on wheels"], "question": "which entity has a laptop?", "label": 1}, {"captions": [" a yellow triangular kite, resembling a bird with long legs, flying like a glider or airplane.", "a white glass beer mug."], "sample_ids": ["22228e69a1704e58af5c9dd184843508", "1d686cbd3e9a4c629a43088658989286"], "properties": ["yellow, shape, wingspan", "color, white, glass"], "captions_pred_pc": ["above a 3d rendering of a human body on a white background", "a black and white drawing of a beer mug on a white background"], "captions_pred_image": ["a white kite flying in the sky on a gray background", "a 3d model of a glass pitcher"], "question": "which object is made of glass", "label": 1}, {"captions": [" a pink and white spiral staircase resembling an ice cream cone, with striped tower and purple accents.", " a small wooden house with a green roof."], "sample_ids": ["1f98d43acd2543e4821f630160cc7b7d", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["color, shape, tower", "roof, color, green"], "captions_pred_pc": ["a black and white illustration of a dandelion on a white background dandelion illustration on a white background stock illustration", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d model of an ice cream cone on a white background royalty-free 3d model preview no.1", "a 3d model of a house with a ladder"], "question": "which entity has a roof that is the color of green", "label": 1}, {"captions": ["a featuring a green marble pillar, a marble box with a blue water drop inside, and a cylinder with a blue shirt on it.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["73c76c4d4fc44b4e8b1a26aea3e0cc79", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["cylinder, marble, pillar", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white envelope with many small dots on it", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a marble bench with a hole in it", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a large house/building structure with a roof.", " of a wooden windmill with a red roof."], "sample_ids": ["82859e4c6d4e4bbea94b6252bef1d398", "2ad8fca30285483d8b7f602fa078215d"], "properties": ["roof, structure, house", "roof, color, red"], "captions_pred_pc": ["a black and white photograph of a metal sculpture", "a white and black image of a snowflake in the shape of a snowflake"], "captions_pred_image": ["a 3d model of a large white structure", "a 3d model of a windmill on a gray background"], "question": "which structure has a roof", "label": 1}, {"captions": [" of a green and blue flower with hints of yellow.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["c17ff79bf2334eb4a3f2d8ca4188bf1f", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["color, shape, size", "roof, color, yellow"], "captions_pred_pc": ["a black and white illustration of a pair of hands holding each other in the shape of a heart royalty free illustration", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a black and white photograph of an abstract sculpture", "a 3d model of a house with a roof"], "question": "which entity is a building?", "label": 1}, {"captions": [" of an egyptian sarcophagus.", "a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier."], "sample_ids": ["70aa484af2ab44149a608dd81a6ff459", "ef8288c9fdfc4e0f9c1fe25d570a104e"], "properties": ["sarcophagus, material, wood", "color is white, yellow, plastic"], "captions_pred_pc": ["a black and white circular pattern on a white background", "a black and white image of a metal bowl with dots"], "captions_pred_image": ["a black and white photograph of a sphere with egyptian hieroglyphics on it", "a white plastic container with a label on it"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a white featuring a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet.", " a small flying airplane or fighter jet."], "sample_ids": ["f28f57745af04115b0bacdde80c3b9ee", "2b121bdd0edc468db67f161b840451c6"], "properties": ["a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet", "aircraft, jet, fighter"], "captions_pred_pc": ["a black and white illustration of a pair of sunglasses", "above a black and white image of an airplane on a white background"], "captions_pred_image": ["a 3d model of a medieval drinking horn 3d model available for 3d printing royalty free 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d", "a 3d model of an airplane flying in the air"], "question": "which object is not a fighter jet?", "label": 1}, {"captions": [" a green cucumber wearing a white hat, tie, and white cream, accompanied by a green bottle with a white cap, a cartoon character in a green shirt, and a green and black bomb.", "a featuring a red hat, floating cup, bowl filled with candy, and a strawberry."], "sample_ids": ["0434eb3aa73e42a5a2570ec9c1a9b96b", "e27a9fd533dc41da9cf2eeb8fee2a5af"], "properties": ["green, hat, cream", "hat, candy, strawberry"], "captions_pred_pc": ["a black and white image of a pair of earrings", "a black and white illustration of two spheres"], "captions_pred_image": ["a 3d model of a vibrator in a grey and white color scheme royalty-free 3d model no.", "a black and white image of a person wearing a hat"], "question": "which entity has a hat", "label": 1}, {"captions": [" a small house on an island with trees, shrubs, a pool, and a lake.", " a small yellow table with a staircase and a square ceiling light fixture."], "sample_ids": ["c8331489fca44685bedfa1bdadf6ccb3", "36f4d2cbd02345c6a77f7345ebde841c"], "properties": ["house, lake, pool", "table, staircase, light"], "captions_pred_pc": ["a black and white image of a pattern on a piece of paper", "a black and white photo of the letter g"], "captions_pred_image": ["a 3d model of a large building", "a 3d model of a table with stairs"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" the earth featuring various elements such as temperature chart, blue and green stripes, blue arrow, exosphere label, england label, blue and purple stripes, and a blue flag.", "three white paper windmills and a city model with a nativity scene silhouette."], "sample_ids": ["4945571db2d8467cb2aed8dd0d891c2e", "fa5ee6165f31465d9d75d046818f4006"], "properties": ["color, temperature, england", "windmills, silhouette, city model"], "captions_pred_pc": ["of a black and white photo of an airplane on a white background", "a black and white photo of a pair of sunglasses"], "captions_pred_image": ["a 3d model of the earth with a rainbow in the sky", "a 3d model of a cityscape on a white background"], "question": "which entity has a silhouette?", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", "a wooden tower made of stacked blocks with holes in them, resembling a toy castle."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "da8b5d21da9b4037982f29383d60b100"], "properties": ["color, shape, and size", "resembles, toy, castle"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "a black and white drawing of a pair of scissors"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d model of a tower made out of blocks"], "question": "which entity is a toy?", "label": 1}, {"captions": [" a silver spaceship-skull hybrid with blue lights, spheres, and jewels, featuring a robot with a head and blue eyes.", " of a stone fountain with a lion statue, surrounded by an archway inspired by the arch of triumph."], "sample_ids": ["ae7c805076b14abe83578069c7bf1c03", "a72700696c3b44ef8101d1e71e914bc9"], "properties": ["color, light, jewels", "lion, statue, fountain"], "captions_pred_pc": ["in 15 words or less a black and white image of a snowflake on a white background royalty free illustration", "a black and white image of a metal object"], "captions_pred_image": ["a 3d sculpture of a heart with various objects on top of it", "a 3d image of a lion statue on a wall"], "question": "which entity is more like a fountain", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " a rusty green metal box with a handle and a gun inside."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "76cfd0e88ce243d483919a018a4f1a9e"], "properties": ["color, white, black, white", "box, handle, gun"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "a black and white square with dots on a white background"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d rendering of a metal box with a handle"], "question": "which object has a handle", "label": 1}, {"captions": [" a small wooden log cabin.", " a large steel building with many columns and a pool."], "sample_ids": ["dc05e20369e640609f0651ce66118669", "2ce649a4152a45bab60d8cafa1dcdeb3"], "properties": ["size, material, log", "building material, pool, steel"], "captions_pred_pc": ["a black and white drawing of a person sitting on a bench in a park royalty free illustration", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d model of a log cabin 3d model of a log cabin royalty free 3d model preview no 3", "a 3d model of a concrete structure"], "question": "which building is made of steel", "label": 1}, {"captions": [" of a red and white pokeball with the word \"pichu\" on it, compatible with various modeling and animation software.", "red swivel chair with a white base ."], "sample_ids": ["7d76c44cb9dd4948b8766d83994ca5f3", "7b78fb47a2684906bcc22ac6e848999a"], "properties": ["- material is stl, obj, fbx- size is 240px- color is red, white", "color, red, base, white"], "captions_pred_pc": ["a black and white circular pattern on a white background", "a black and white image of a sphere made up of many small dots on a white background"], "captions_pred_image": ["a close-up view of a black ball with a screw in the center", "a 3d model of a modern chair"], "question": "which entity is a chair?", "label": 1}, {"captions": [" of a purple laundry rack with multiple lines resembling wires.", " of a small white building with stairs and a lid."], "sample_ids": ["1f0dce1431a842b8bdb24ac4dd05f386", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["color, shape, material", "building, stairs, lid"], "captions_pred_pc": ["a black and white image of a metal structure", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a wire rack on a white background", "a 3d model of a white box on a gray background"], "question": "which entity is a building?", "label": 1}, {"captions": [" a red \"welcome to northwich\" billboard on a wooden base.", " a small purple plastic chair with holes."], "sample_ids": ["225e4094141d416faba7c5598dc55ff8", "fe2bf0f8f5c64dd6bac3e2da0d1b89d0"], "properties": ["base material is wood, color is red, message is welcome to northwich", "color, plastic, purple"], "captions_pred_pc": ["a black and white illustration of a circular object with many small dots on it", "this image may contain clothing apparel accessory purse bag and handbag"], "captions_pred_image": ["a 3d model of a chalkboard on a pedestal royalty free 3d model preview no.2", "3d model of a chair royalty free 3d model preview no 3"], "question": "which object is made of plastic", "label": 1}, {"captions": [" of an orange shopping bag with handles and a \"t\" logo.", " a small house with a roof and door, resembling a shack or shed."], "sample_ids": ["392dcf37195e43948cfbffe099082108", "f1b557775310478893242180defa4d80"], "properties": ["color, logo, handle", "shack, roof, door"], "captions_pred_pc": ["a black and white image of a purse with a chain", "a black and white illustration of a telephone on a white background"], "captions_pred_image": ["a 3d model of a shopping bag royalty free 3d model no.2", "a 3d model of a small house in the middle of a field"], "question": "which entity has a roof and door", "label": 1}, {"captions": [" a small gnome chandelier with three candles, featuring red and blue lights and adorned with pink and blue ribbons.", " a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog."], "sample_ids": ["6398f4e75a2d480da58396827ac64249", "f6c6e7a65a3e42dfa431b1d984c72f28"], "properties": ["- color is red, blue, pink", "house, fence, dog"], "captions_pred_pc": ["for a black and white image of an object on a white background", "above a black and white drawing of a bathroom"], "captions_pred_image": ["a white chandelier with three light bulbs hanging from the ceiling", "a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a"], "question": "which entity has a fence", "label": 1}, {"captions": [" a green and black bat-winged sword with green flames.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["78c5d5bdf2da4b03a467ef9ab2a0c13f", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["color, shape, material", "door, lock, handle"], "captions_pred_pc": ["of a person standing on a white background holding a long metal rod", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a 3d model of a dragon's wing in the air", "a black and white image of a door with a crack in it"], "question": "which entity is made of wood", "label": 1}, {"captions": ["a small blue plastic box with a lid and handle.", " a small black house with a green roof, resembling a shed or container."], "sample_ids": ["3e5cc957c888491ba1a5773299d3daa9", "bdb8e4c36ccb477890fd6ae569ae305c"], "properties": ["blue, lid, handle", "black, roof, green"], "captions_pred_pc": ["in 15 or fewer words a black and white illustration of a rectangular tray", "a black and white drawing of a square with dots all over it"], "captions_pred_image": ["a 3d rendering of a gray plastic container", "a 3d model of a small black building"], "question": "which entity is a building?", "label": 1}, {"captions": [" of a destroyed building with a watercolor painting of a dilapidated house.", " a small white house with a staircase and a window."], "sample_ids": ["5a33f024faf145ac80cdadcdfef8a797", "9eb88d17310d42dda9e17883e9922525"], "properties": ["image, building, painting", "house, staircase, window"], "captions_pred_pc": ["above a black and white drawing of a building", "a black and white illustration of a person standing in the middle of a crowd of tiny black dots on a white background royalty free illustration"], "captions_pred_image": ["a black and white photograph of a damaged house", "a 3d rendering of a small room with a staircase"], "question": "which image shows a house with a staircase and a window?", "label": 1}, {"captions": ["white s of a wall-mounted light, toilet with handle, faucet, and lamp with a light bulb.", " a small white building with stairs and a white table."], "sample_ids": ["92052c493bf141a08b56f30f9c5e2d61", "e30374c614f54fdb90f35b96b071349d"], "properties": ["light, toilet, faucet", "building, stairs, table"], "captions_pred_pc": ["in 15 words or less a 3d illustration of an object made of dots on a white background 3d illustration of an object made of dots on a white background royalty free illustration", "above a black and white drawing of a cat sitting on top of a letter 'e'"], "captions_pred_image": ["a white plastic toilet paper holder on a gray background", "a 3d model of a building with a staircase"], "question": "which entity has stairs", "label": 1}, {"captions": ["s of a plane, house, pile of rocks, and islands, accompanied by watercolor paintings of a tree, rocks, and a boat in a sand dune.", " a damaged room with destroyed furniture, featuring a kitchen, bathroom, and broken window."], "sample_ids": ["8da8da6ccf5f4011a4115977c55d1cb8", "06dd6456dc244a51b6b6e1c8524820de"], "properties": ["s, plane, house", "room, furniture, window"], "captions_pred_pc": ["a black and white silhouette of a map", "above a black and white drawing of a person sitting on a bench"], "captions_pred_image": ["a black and white image of a small island in the middle of a body of water", "a 3d image of a room with a person in it"], "question": "which entity has a damaged window", "label": 1}, {"captions": ["a white featuring a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet.", " of a construction site featuring a concrete wall, tools, equipment, wooden bench, fence, chair, and a graffiti-covered green wheelbarrow."], "sample_ids": ["f28f57745af04115b0bacdde80c3b9ee", "766a075b4760433bafdbe88bee546796"], "properties": ["a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet", "graffiti, wall, wheelbarrow"], "captions_pred_pc": ["a black and white illustration of a pair of sunglasses", "for a black and white image of a bowl on a stand"], "captions_pred_image": ["a 3d model of a medieval drinking horn 3d model available for 3d printing royalty free 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d", "a 3d model of a wall with graffiti on it"], "question": "which entity has a wheelbarrow?", "label": 1}, {"captions": [" a stack of books, a pile of paper, and a lamp with a black and white shade.", " a house with a green, wooden-structured roof."], "sample_ids": ["6a06b505bcb34026a07ac15931f9f6f3", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["books, paper, lamp", "roof, color, green"], "captions_pred_pc": ["a black and white silhouette of a map of the state of new york", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a black and white photograph of a toilet paper holder", "a 3d model of a house with a triangular roof"], "question": "which object is made of wood", "label": 1}, {"captions": [" a small house on an island with trees, shrubs, a pool, and a lake.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["c8331489fca44685bedfa1bdadf6ccb3", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["house, lake, pool", "house, fence, playground"], "captions_pred_pc": ["a black and white image of a pattern on a piece of paper", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a 3d model of a large building", "a 3d model of a room with a lot of wires"], "question": "which entity has a fence", "label": 1}, {"captions": [" a red circular object with a checkered pattern, resembling a round pillow or bed cover.", " a futuristic, small spaceship with two propellers, resembling a jet fighter."], "sample_ids": ["9cf9fb6d07084488892422a5a5be00ef", "832a022cdcc74763b0571e04af4e592b"], "properties": ["pattern, color, shape", "resembles a jet fighter, futuristic, small"], "captions_pred_pc": ["a black circle on a white background", "a black and white illustration of a person's hand holding a pencil"], "captions_pred_image": ["a 3d model of a round cushion royalty-free 3d model preview", "a lego model of a futuristic airplane"], "question": "which entity is a spaceship?", "label": 1}, {"captions": ["a 3d wooden pole with a metal spear handle.", " a rusty green metal box with a handle and a gun inside."], "sample_ids": ["1d5cf234576e41f0ba209c5e19d2db47", "76cfd0e88ce243d483919a018a4f1a9e"], "properties": ["- material is wood, metal, metal", "box, handle, gun"], "captions_pred_pc": ["of a black and white illustration of a vase on a pedestal", "a black and white square with dots on a white background"], "captions_pred_image": ["a black and white image of an old-fashioned pitchfork", "a 3d rendering of a metal box with a handle"], "question": "which object has a handle", "label": 1}, {"captions": ["a white container with a lid, labeled \"real vanilla,\" containing white powder, resembling a yogurt or raw ice cream container with a logo.", " a house with a pink roof, brick walls, and insulated ceiling."], "sample_ids": ["ecd1d75076b641b382b670987e8a55ab", "c8936ace72954650b4e2d84246964849"], "properties": ["container, powder, white", "roof, color, pink"], "captions_pred_pc": ["in 15 words a black and white image of dots on a white background a black and white image of dots on a white background royalty free illustration", "a black and white drawing of a toilet"], "captions_pred_image": ["a clear plastic container with a white label on it", "a 3d model of a house with a roof"], "question": "which entity has a roof that is the color of pink", "label": 1}, {"captions": ["a 3d white object featuring stacked racks, toy train, blocks, plastic pipes, lego pieces, and clothes hangers.", " a long row of steel shelves in a warehouse, featuring a suspended scaffolding system."], "sample_ids": ["c9c786f133f54d5e8d99bfc1a588df41", "578fe7a7bd754b889be33aea99cf5050"], "properties": ["stacked, racks, toy train", "a, material, steel"], "captions_pred_pc": ["a black and white photo of a person standing in front of a white wall", "above a black and white image of a rack with multiple shelves"], "captions_pred_image": ["a 3d model of a ship's propeller on a white background", "a 3d model of a large metal structure"], "question": "which object is made of steel", "label": 1}, {"captions": [" a house with a pink roof, truss, and a square white ceiling lamp.", " a house featuring a pitched roof structure with brick detailing."], "sample_ids": ["91b2e9e4660946f5b4808a18b5323b69", "5fbd274f897b44fcafa02ee84228debf"], "properties": ["roof, truss, lamp", "structure, roof, pitch"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white illustration of a square with a lot of dots on it"], "captions_pred_image": ["a 3d model of a house with a metal roof", "a 3d model of the roof of a house"], "question": "which entity has a pitched roof structure?", "label": 1}, {"captions": [" of a white sculpture, resembling a horse and paper plane, on a gray background.", " of a small wooden house with two roofs."], "sample_ids": ["179b4438edfc4a43a27a83784f38ff4b", "30fc23ae4edb42609e30e029dede54bd"], "properties": ["color, background, white", "house, roof, wooden"], "captions_pred_pc": ["above a black and white image of a sculpture in the shape of a bird", "of a pair of stainless steel screws on a white background"], "captions_pred_image": ["a 3d printed sculpture of a horse's head on a gray background", "a 3d model of a small barn"], "question": "which entity has a wooden roof", "label": 1}, {"captions": [" a purple and green sphere.", "a victor calculator with a black plastic cover and wall-mounted design."], "sample_ids": ["906b2d1219804f4f9e57d4f6cfd47a83", "88ffa01f4fc34a8cb3e2a659e9e26125"], "properties": ["color, shape, size", "cover, black, plastic"], "captions_pred_pc": ["in 15 words or less a black and white image of a sphere in the shape of a light bulb royalty free illustration", "of a black and white image of a skateboard"], "captions_pred_image": ["a 3d model of a ball with a face on it", "a victor calculator on a white background"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a blue and white building structure with a table and suspended ceiling.", " of a small wooden house with a blue roof, featuring a police sign and resembling a lighthouse."], "sample_ids": ["fc63507c452c4a4b89f614f2b8bff76a", "9b2c93d651c3409096118c5ce5b993f2"], "properties": ["color, table, ceiling", "house, roof, blue"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a room", "a black and white illustration of a coffee mug on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a table with multiple tables stacked on top of each other", "a 3d model of a small house and barn"], "question": "which structure has a blue roof", "label": 1}, {"captions": [" of a house featuring a wooden roof structure with trusses and beams.", " a spider-like creature with long arms and legs."], "sample_ids": ["8cd3f5ff0fc041eca9a480faa6739480", "199bcb789e0c439bb2eeb32f2425cc36"], "properties": ["roof, trusses, beams", "arachnid, leg, arm"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a window", "a black and white illustration of a spider on a white background"], "captions_pred_image": ["a 3d model of a roof structure", "a black and white image of an alien creature"], "question": "which entity has more legs", "label": 1}, {"captions": [" of a green box with four compartments", " of a white chest of drawers with legs."], "sample_ids": ["85599667bbcf4d7aaeaa11251be6280b", "f00dfa8b5e7e4fc6bbf97d718b66f390"], "properties": ["color, shape, number", "chest of drawers, legs, white"], "captions_pred_pc": ["a group of black dots on a white background, arranged in the shape of a sofa", "of a black and white leopard print rug"], "captions_pred_image": ["a 3d rendering of a black box with four compartments", "a 3d rendering of a white dresser"], "question": "which object has legs", "label": 1}, {"captions": [" a small house with a red roof.", " a wooden roof structure with a pink roof."], "sample_ids": ["085db9059b744673b5623b5338e02196", "b70565bda31d42958d3597bf6067ddd2"], "properties": ["roof, red, house", "roof, color, pink"], "captions_pred_pc": ["a black and white dotted square on a white background", "above a black and white image of a metal grate"], "captions_pred_image": ["a 3d model of a small shed in the snow", "a 3d model of the roof of a building"], "question": "which roof is made of wood", "label": 1}, {"captions": [" a building with wooden and steel structures, featuring stairs, railings, and a ceiling with numerous pipes.", " of a small white building with stairs and a lid."], "sample_ids": ["26c47880756b4876b4f263373c3c5303", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["building, material, steel", "building, stairs, lid"], "captions_pred_pc": ["a black and white drawing of a floor plan", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a large structure with multiple levels", "a 3d model of a white box on a gray background"], "question": "which building has a lid?", "label": 1}, {"captions": ["a wooden tower made of stacked blocks with holes in them, resembling a toy castle.", "a 3d wooden toy castle with red and yellow towers, a red roof, and a man inside."], "sample_ids": ["da8b5d21da9b4037982f29383d60b100", "311f6655ed854899b07ea10f3613ef7a"], "properties": ["resembles, toy, castle", "a, color, red"], "captions_pred_pc": ["a black and white drawing of a pair of scissors", "a black and white drawing of a wallet on a white background"], "captions_pred_image": ["a 3d model of a tower made out of blocks", "a 3d model of a castle with two towers"], "question": "which toy castle has a red roof", "label": 1}, {"captions": [" a house with a green roof and lawn.", " a large house/building structure with a roof."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "82859e4c6d4e4bbea94b6252bef1d398"], "properties": ["roof, green, lawn", "roof, structure, house"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "a black and white photograph of a metal sculpture"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a 3d model of a large white structure"], "question": "which entity has a roof", "label": 1}, {"captions": ["a featuring a small boat, a rock with a hole, and blue water.", " a multicolored cube representing a protein, featuring pink, yellow, red, and green hues."], "sample_ids": ["7ccdffc0d6404e8d9144260255ea0c5c", "ee7c3113f2754f9cbe8980b1b7cc4eff"], "properties": ["water, boat, rock", "color, shape, color"], "captions_pred_pc": ["a black and white illustration of a surfboard", "a black and white pattern of small dots on a white background a black and white pattern of small dots on a white background illustration"], "captions_pred_image": ["a 3d image of an animal laying on the ground", "a 3d model of a piece of fabric"], "question": "which entity is a cube?", "label": 1}, {"captions": [" a castle on an island with a small floating house, trees, and clouds.", " a potted plant on a wooden table or shelf."], "sample_ids": ["c4c09479570943e2845fbd4c6a450568", "1f99b86478764fa7abd65785a53ebbe8"], "properties": ["castle, island, house", "potted plant, table, shelf"], "captions_pred_pc": ["above a black and white illustration of a group of dots in the shape of a circle", "a black and white illustration of a plant in a vase on a white background vector illustration of a black and white illustration of a plant in a vase on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small house on an island", "a black and white image of a plant in a pot"], "question": "which object is on a table?", "label": 1}, {"captions": [" of a yellow metal locker with legs, wheels, and metal brackets.", " a black triangular metal object with a clock and cross on it, and a small machine on top."], "sample_ids": ["e3fde8fe782c41f0b141c9f1b8e13aa5", "b198a81dc41c4fde8be3ca51c3b0e676"], "properties": ["metal, legs, wheels", "metal, cross, clock"], "captions_pred_pc": ["a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white", "above a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of an old metal locker", "a 3d model of a piece of furniture"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a small wooden house with a blue roof, featuring a police sign and resembling a lighthouse.", " a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog."], "sample_ids": ["9b2c93d651c3409096118c5ce5b993f2", "f6c6e7a65a3e42dfa431b1d984c72f28"], "properties": ["house, roof, blue", "house, fence, dog"], "captions_pred_pc": ["a black and white illustration of a coffee mug on a white background royalty free illustration", "above a black and white drawing of a bathroom"], "captions_pred_image": ["a 3d model of a small house and barn", "a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a"], "question": "which house has a fence?", "label": 1}, {"captions": [" a red and brown rock formation with a broken tree branch.", " a rusty green metal box with a handle and a gun inside."], "sample_ids": ["c29d48d320c04ed1bf5aafe0a3df3d78", "76cfd0e88ce243d483919a018a4f1a9e"], "properties": ["color, shape, texture", "box, handle, gun"], "captions_pred_pc": ["a black and white silhouette of an island on a white background", "a black and white square with dots on a white background"], "captions_pred_image": ["a 3d image of a rock formation on a snowy surface", "a 3d rendering of a metal box with a handle"], "question": "which object has a handle", "label": 1}, {"captions": [" of a white, labeled stone resembling a block of ice, bar of soap, or rock.", "a featuring a small boat, a rock with a hole, and blue water."], "sample_ids": ["44723e86bdd14071b0544745879604de", "7ccdffc0d6404e8d9144260255ea0c5c"], "properties": ["resembles, bar, soap", "water, boat, rock"], "captions_pred_pc": ["a black and white image of a piece of paper with dots on it", "a black and white illustration of a surfboard"], "captions_pred_image": ["a 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble on a white background 3d rendering of a piece of marble", "a 3d image of an animal laying on the ground"], "question": "which entity has a boat?", "label": 1}, {"captions": [" of a metal tool with a yellow handle, a laptop, and a ceiling light fixture.", " a stone, wood, rock, sliced bread, and a skull with a blue hat."], "sample_ids": ["b714bf13e9e54acb867c2c1b3ccf8ae8", "0169af65ffc64bbf8e2fe6c6de08d485"], "properties": ["metal, laptop, light fixture", "hat, skull, bread"], "captions_pred_pc": ["for a black and white image of a corner shelf", "a black and white illustration of a skull in the shape of dots"], "captions_pred_image": ["a 3d model of a telescope on a stand", "a black and white image of a stone sculpture"], "question": "which entity has a skull?", "label": 1}, {"captions": [" of a white sheet of paper or box on a gray background.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["7b0c1e02d9b14f2fae4f1f7040661cc7", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["color, white, background, gray", "roof, color, yellow"], "captions_pred_pc": ["above a black and white photograph of an object", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a paper airplane", "a 3d model of a house with a roof"], "question": "which entity has a yellow roof", "label": 1}, {"captions": [" a white rock with green grass and moss on it.", "a white 3d object featuring black and white patterns, resembling a combination of a dish, smoke detector, cake, bowl, alarm clock, and ceiling light."], "sample_ids": ["0b53ca3fca7b4ef7bef27cb287daf32e", "6a8cc820f00a4cfc954d56e2b1f6206a"], "properties": ["color, grass, moss", "- material is plastic- color is white- shape is cylinder"], "captions_pred_pc": ["above a black and white drawing of a long, curved line on a white background", "in 15 words or less a black and white drawing of a plate"], "captions_pred_image": ["a 3d sculpture of a piece of white chocolate in the shape of a map", "a cake with a black and white design on the top of the cake"], "question": "which object is whiter", "label": 1}, {"captions": [" a wooden-framed house with roof trusses.", " of a small wooden house with a roof."], "sample_ids": ["e60dd370c5ec468da4689a801f951157", "f5904a9d87ff4fa688146c18c1f27fec"], "properties": ["frame, roof, trusses", "roof, house, wooden"], "captions_pred_pc": ["a black and white drawing of a metal grate", "a black and white drawing of a house with dots"], "captions_pred_image": ["a 3d model of a house under construction", "a 3d model of a small house"], "question": "which house has a roof", "label": 1}, {"captions": [" of a human bone", " a large orange keg with a white lid."], "sample_ids": ["371bc8ded342446d87f975c3df8e0d4f", "cf24eea70b4f4067b36583924a82cc35"], "properties": ["a, b, c", "color, lid, orange"], "captions_pred_pc": ["a black and white photo of a bone on a white background", "a black and white circular pattern of dots on a white background a black and white circular pattern of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a human femur bone on a gray background", "a gray keg with a white lid sits on top of a gray surface"], "question": "which entity is not orange?", "label": 0}, {"captions": [" a stone arrowhead with blue crystals and ice-like features.", "red heart-shaped object."], "sample_ids": ["5f8c7eda0f464019a4acea243114555d", "a848538c7e4249a4af8e86c477193fa1"], "properties": ["- material is stone - color is blue - shape is arrowhead", "shape, heart, red"], "captions_pred_pc": ["above a black and white drawing of an arrow", "a black and white image of a skateboard with dots"], "captions_pred_image": ["a 3d model of a large piece of ice on a white background royalty free 3d model preview no 3", "a 3d model of a heart shaped object"], "question": "which object is red", "label": 1}, {"captions": [" a small house with a tree and a rock.", " a small house with a tree, pool, and pond in a green and blue landscape."], "sample_ids": ["9dc392a7f6e444e5bfb720684d6f864a", "e22489ba182f45cb81f0a83f22abe9bd"], "properties": ["house, tree, rock", "house, tree, pool"], "captions_pred_pc": ["in 15 words or less the image depicts a white square on a white background with black dots all over the square stock illustration", "in 15 words or less a black and white image of a square with dots around it"], "captions_pred_image": ["a 3d model of a small house with a tree in front of it", "a 3d model of a house and a tree in a box royalty free 3d model preview no.3"], "question": "which house has a pool", "label": 1}, {"captions": ["a gray background featuring a white line in the middle.", " a white rocking chair with a curved backrest."], "sample_ids": ["47f89f92bef14b7193d0ffa3934f6977", "ee0deb90abf943b6894cd5ded1331213"], "properties": ["color, line, gray", "backrest, curved, yes"], "captions_pred_pc": ["above a black and white image of a piece of furniture", "a black and white illustration of a spiral pattern on a white background a black and white illustration of a spiral pattern on a white background royalty free illustration"], "captions_pred_image": ["an airplane flying in the sky with the sun shining behind it", "a 3d model of a white chair royalty free 3d model no. 3"], "question": "which object has a curved backrest", "label": 1}, {"captions": ["a featuring a plane, a small plane, a bird, and a dragonfly all flying in the air.", " a wooden shed with a gray roof."], "sample_ids": ["f39783d05dec49e49482c407d656e0f7", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["a, plane, small plane", "roof, color, gray"], "captions_pred_pc": ["of a black and white photo of an airplane propeller", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d printed model of a fighter plane in the air", "a 3d model of a shed with a gray roof"], "question": "which object has a roof that is the color of gray", "label": 1}, {"captions": ["a white 3d-printed skull model.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["10fb0de0bec64ec8a424e0b404670ba5", "b896a0898efe4059a776193c02132129"], "properties": ["color, white, model", "- material is stone, metal, concrete"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a fish", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of an animal's head on a gray background", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": [" a small building with windows and a roof.", " a white and blue building with a black roof."], "sample_ids": ["0ef2cac27e364c0687afae7ab5040cc3", "c893118316ee43e18322e5964b2806c5"], "properties": ["roof, windows, building", "color, white, blue, roof, black"], "captions_pred_pc": ["a black and white square made up of small dots on a white background", "a black and white illustration of a person standing on top of a hill made up of tiny dots"], "captions_pred_image": ["a 3d model of an apartment building royalty free 3d model preview no 3", "a 3d model of a white building on a gray background royalty free 3d model no."], "question": "which building has a roof that is black?", "label": 1}, {"captions": [" a two-story small apartment building with a roof.", " a large, black and white circular building, resembling a stadium or ring structure."], "sample_ids": ["8d1102e923954604ae7045a7ca14c1f6", "67f46bb0048244c687a58d1017a08f6b"], "properties": ["two-story, roof, building", "building, color, black and white"], "captions_pred_pc": ["a black and white pattern of dots in the shape of the letter c on a white background vector illustration of a black and white pattern of dots in the shape of the letter c on a white background royalty free illustration", "the letter c in black and white illustration of the letter c in black and white illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustr"], "captions_pred_image": ["a 3d model of an apartment building royalty free 3d model preview no 2", "a 3d model of a circular fence with black and white stripes"], "question": "which building is black and white", "label": 1}, {"captions": [" a wooden desk with drawers and a green top.", " of a wooden table with metal legs and frame."], "sample_ids": ["ae41fa78e18748ab89571113754ea59a", "d6d6f13cda4e485fbed0dcd19b9c9314"], "properties": ["top, color, wood", "metal, legs, frame"], "captions_pred_pc": ["a black and white drawing of a fireplace", "a black and white image of a patterned rug"], "captions_pred_image": ["a 3d model of a desk royalty free 3d model preview no 3", "a 3d rendering of a white wooden table"], "question": "which table has metal legs and frame", "label": 1}, {"captions": [" a small building with a school, house, soccer field, and white ceiling light, featuring a white and green paper clip.", " a small house with a tree in front, situated on a hill."], "sample_ids": ["97f487941d26472294e005fa97c403be", "3bde44b5f10946398f1bb9843dc14caa"], "properties": ["color, white, green", "house, tree, hill"], "captions_pred_pc": ["for a black and white drawing of a computer keyboard", "a black and white photo of a cell phone in a puddle of water"], "captions_pred_image": ["a 3d model of an office building", "a 3d model of a house in the middle of a field"], "question": "which entity has a tree in front", "label": 1}, {"captions": [" a black castle with stairs and a door.", " a wooden staircase with a door and square ceiling panel."], "sample_ids": ["036d34dec6274f6a99d8f9689d19a77d", "3fe7e366bf924a00bf4b06ded35fd392"], "properties": ["door, stairs, color", "door, panel, staircase"], "captions_pred_pc": ["for a black and white image of a toilet paper holder", "above a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black"], "captions_pred_image": ["a 3d model of a medieval castle on a white background royalty free 3d model", "a 3d model of a staircase on a wooden floor"], "question": "which staircase has a door", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["color, white, black, white", "torso, breasts, pattern"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d model of a woman's chest"], "question": "which entity has a pattern", "label": 1}, {"captions": [" a small house with a roof.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["0d2246e433ce4066b76489f17ba8d694", "6b745457e06840119058883b35f78f58"], "properties": ["roof, house, small", "roof, color, blue"], "captions_pred_pc": ["a black and white square made up of small dots on a white background", "a black and white image of a building with dots"], "captions_pred_image": ["a 3d model of a house with a triangular roof", "a 3d model of a house with a steeple on top"], "question": "which house has a blue roof", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " a house with a wooden-framed roof structure."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["roof truss, insulation, suspended ceiling", "roof, material, wood"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d model of a building with a roof"], "question": "which roof is made of wood", "label": 1}, {"captions": [" a white hospital operating room with blue containers and medical equipment.", " a building with yellow lines."], "sample_ids": ["a10f3196831647bc8842eacac640047d", "f18e34286cf54876874f55ecc9018492"], "properties": ["color, white, containers", "color, yellow, lines"], "captions_pred_pc": ["a black and white illustration of the letter 'f' made up of tiny dots", "a black and white drawing of a map"], "captions_pred_image": ["a 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room", "a drawing of an airplane flying over a city"], "question": "which entity has more lines", "label": 1}, {"captions": ["s of a cat, toilet, white bird with black eyes, stuffed animal, man in a white shirt, paper airplane, and cow.", " a small village featuring houses, trees, and a winding road."], "sample_ids": ["7adf9de5fb734455a3a3a7f084e3d628", "7acf46c0265d4e39b97ac084852abde8"], "properties": ["cat, toilet, white bird with black eyes, stuffed animal, man in a white shirt, paper airplane, cow", "houses, trees, road"], "captions_pred_pc": ["a black and white image of a flying saucer", "in 15 words or less a black and white photo of a mountain landscape"], "captions_pred_image": ["a black and white image of a fighter plane flying upside down", "a black and white photograph of a small town"], "question": "which entity has a road?", "label": 1}, {"captions": [" a small white building featuring a room with a door and a white shelf.", " of a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["d7b78fa9a6b64f6095b881bc619b04fe", "8cd3f5ff0fc041eca9a480faa6739480"], "properties": ["room, door, shelf", "roof, trusses, beams"], "captions_pred_pc": ["above a black and white illustration of a person standing in front of a door", "in 15 words or less a black and white drawing of a window"], "captions_pred_image": ["a 3d model of an empty room", "a 3d model of a roof structure"], "question": "which entity has a roof", "label": 1}, {"captions": [" a shattered piece of paper, resembling a broken phone and a flying newspaper.", " an ice cream machine, popsicle with two sticks, and a shelf with a computer monitor."], "sample_ids": ["80d02e2b6ceb4a3a81b6b67d2d98bc0a", "cb840159fea7436d81eb33bdccad3596"], "properties": ["shattered, resembles, broken phone", "A, a, a"], "captions_pred_pc": ["for black ink brush strokes on a white background", "a black and white illustration of a bench"], "captions_pred_image": ["a 3d model of a snowboarder in mid-air over a snowy landscape", "a 3d rendering of a white and gray wall mounted shelf"], "question": "which entity is not a shattered piece of paper?", "label": 1}, {"captions": [" of a red and white pokeball with the word \"pichu\" on it, compatible with various modeling and animation software.", " a gray, metal pillar/cylinder."], "sample_ids": ["7d76c44cb9dd4948b8766d83994ca5f3", "11391e6bab574dc0be8f2440fbc3b724"], "properties": ["- material is stl, obj, fbx- size is 240px- color is red, white", "color is gray, material is metal, shape is cylinder"], "captions_pred_pc": ["a black and white circular pattern on a white background", "of a black candle on a white background"], "captions_pred_image": ["a close-up view of a black ball with a screw in the center", "a 3d model of a candlestick"], "question": "which object is made of metal", "label": 1}, {"captions": [" a small black box on a purple square.", " a white motorcycle with wings."], "sample_ids": ["75f209e543c046669099190953616acc", "7e684a7c012c4fd0ac91844f22457640"], "properties": ["color, shape, size", "color, white, wings"], "captions_pred_pc": ["a black and white image of the letter l", "a black and white image of a pair of sunglasses"], "captions_pred_image": ["a 3d model of a small black box sitting on top of a gray surface", "a 3d model of a motorcycle on a white background"], "question": "which object is white", "label": 1}, {"captions": [" of a cup, bottle, and two metal buckets on a chessboard with a square ceiling light fixture.", " a wooden shed with a gray roof."], "sample_ids": ["05b5a5da1a0a4c1fa60a9e5edd5c3424", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["cup, bottle, chessboard", "roof, color, gray"], "captions_pred_pc": ["a black and white 3d shape made up of small dots on a white background", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a chess set on a checkered board royalty-free 3d model", "a 3d model of a shed with a gray roof"], "question": "which object has a roof", "label": 1}, {"captions": ["a 3d red toy character with glasses and a black hat.", "a low-poly of a white, shattered sphere."], "sample_ids": ["335049efca5c45c4b89f594b8366b3cc", "94119660e7054fc5b7baa68a4e39968c"], "properties": ["color, hat, glasses", "sphere, color, texture"], "captions_pred_pc": ["of a sphere made of dots on a white background a sphere made of dots on a white background vector illustration of a sphere made of dots on a white background royalty free illustration", "a black and white illustration of a dandelion on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white"], "captions_pred_image": ["the wizard of oz 3d model royalty free 3d model preview no 3", "a 3d model of a cracked egg"], "question": "which entity is a sphere?", "label": 1}, {"captions": [" a house with wooden framing and trusses.", " a house with a wooden-framed roof structure."], "sample_ids": ["4501794e257c4a8ba60a94757d8e93a9", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["frame, trusses, wood", "roof, material, wood"], "captions_pred_pc": ["a black and white drawing of a window", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a house under construction", "a 3d model of a building with a roof"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": ["a featuring a small boat, a rock with a hole, and blue water.", "a 3d object featuring a white tray with a decorative pattern, a silver tray with a bird, a laptop, a long knife, and a metal bar."], "sample_ids": ["7ccdffc0d6404e8d9144260255ea0c5c", "b1099ba41d9f4af19d1a91761bb6074c"], "properties": ["water, boat, rock", "Object, Tray, Tray"], "captions_pred_pc": ["a black and white illustration of a surfboard", "above a black and white image of a piece of paper"], "captions_pred_image": ["a 3d image of an animal laying on the ground", "a 3d image of a white tray with an intricate design"], "question": "which object is not a tray?", "label": 0}, {"captions": [" a child's room in a small house with windows.", " a molecule with two pink and blue spheres."], "sample_ids": ["88847a6445044bcbab9611e6028a19b9", "926fb2e23c56440a86287e98c1440336"], "properties": ["room, house, windows", "color, shape, size"], "captions_pred_pc": ["for a black and white drawing of a snowflake", "a molecule with the word 'rono' written in black ink on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a child's room with toys and furniture in it royalty free 3d model preview no.1", "a 3d model of three spheres on a white background"], "question": "which entity is a molecule?", "label": 1}, {"captions": [" a black and white box-like object with various interpretations, such as a coffee table, building, book, and ceiling fixture.", " of a white rock-like object, possibly a shell or ice."], "sample_ids": ["404d7e2cd8894c31bdda02d2b3196464", "096e42b466ec438d95c5d89a85191534"], "properties": ["black, white, coffee table", "white, rock, shell"], "captions_pred_pc": ["a black and white drawing of a square with dots on it", "in one hundred words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words"], "captions_pred_image": ["a black and white 3d model of a building", "a 3d model of a white rock on a gray background"], "question": "which object is whiter", "label": 1}, {"captions": [" of a metal plaque featuring a picture of a person.", " of a wooden windmill with a red roof."], "sample_ids": ["b9aca92461fc41f89c32dedbfab62be4", "2ad8fca30285483d8b7f602fa078215d"], "properties": ["picture, plaque, person", "roof, color, red"], "captions_pred_pc": ["a long black line on a white background royalty free illustration", "a white and black image of a snowflake in the shape of a snowflake"], "captions_pred_image": ["a black and white photo of a plaque on a wall", "a 3d model of a windmill on a gray background"], "question": "which entity has a roof that is the color of red", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", " a small white house with windows and a black lid."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "4b40af369c1149949f5ccb68becd8430"], "properties": ["color, shape, and size", "white, windows, lid"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "above a black and white image of dots on a white background"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d model of a white house with three windows"], "question": "which entity is a house?", "label": 1}, {"captions": ["a gold robot arm with a thank you sign, holding a camera and featuring a light and keychain.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["3f010c9478164c3db00cab997fdafa40", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["arm, light, sign", "camera, speaker, ceiling fan"], "captions_pred_pc": ["above a black and white image of a skateboard", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on a pedestal 3d model of a robotic arm on", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": [" of a white chair with arms and legs.", " a four-legged metal workbench with shelves."], "sample_ids": ["61ec56afad7a45deb99ccf1ab1bd2d73", "e93b633d477942d9b79ef8ab566473d6"], "properties": ["Arms, Legs, Color", "Four legs, Metal, Shelf"], "captions_pred_pc": ["of a 3d rendering of a chair in the style of the 1920s and 1930s", "for a black and white illustration of a cross"], "captions_pred_image": ["a 3d model of a white outdoor chair royalty free preview no 3", "a 3d model of a table with four legs"], "question": "which object has four legs", "label": 1}, {"captions": [" a small black house with a green roof, resembling a shed or container.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["bdb8e4c36ccb477890fd6ae569ae305c", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["black, roof, green", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["a black and white drawing of a square with dots all over it", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a small black building", "a white 3d model of a city skyline"], "question": "which image is a shelf?", "label": 1}, {"captions": [" a small, modern house with a green roof, located on a hill, surrounded by trees, grass, and a pond.", " a wooden roof structure with a pink roof."], "sample_ids": ["a452d5381dad4dc09f5ebe10635ae5fe", "b70565bda31d42958d3597bf6067ddd2"], "properties": ["house, roof, green", "roof, color, pink"], "captions_pred_pc": ["above an illustration of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white", "above a black and white image of a metal grate"], "captions_pred_image": ["a 3d model of a building with a black roof", "a 3d model of the roof of a building"], "question": "which roof is made of wood", "label": 1}, {"captions": [" a small white house with stairs and a wall-mounted shelf.", "a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars."], "sample_ids": ["10c4ba5b0db4490db9c00c21c94cb41f", "1b3945962a4b4cda9fe939dc5d63e789"], "properties": ["house, color, white", "a room, a cake, a table"], "captions_pred_pc": ["above a black and white drawing of a bench", "a black and white illustration of an object on a white background"], "captions_pred_image": ["a 3d model of a small white building", "a 3d rendering of a white room with various items in it"], "question": "which entity has a room with various objects", "label": 1}, {"captions": ["a small yellow and black helicopter flying in the air.", " a house featuring a pink-purple roof with trusses and wooden ceiling beams."], "sample_ids": ["7qxP6dQ5nNuaG8d0vswXXKnd0vq", "b6b6a3f82bdd47c3afaf9af885ba8703"], "properties": ["color, yellow, black", "roof, trusses, beams"], "captions_pred_pc": ["a black and white illustration of an airplane", "a black and white pattern of dots in the shape of a square royalty free illustration"], "captions_pred_image": ["a black and white helicopter flying in the air", "a 3d model of the roof of a building"], "question": "which entity has a roof", "label": 1}, {"captions": [" a green, spiked cube-like creature with eyes and a liquid-like texture.", "a 3d white axe, hammer, and spoon."], "sample_ids": ["0d2850148a154f3ca72d72817e120759", "96d127abd21049689918e671ec613ef8"], "properties": ["texture, spikes, eyes", "axe, hammer, spoon"], "captions_pred_pc": ["a black and white drawing of a flower", "of a black lace belt on a white background"], "captions_pred_image": ["a 3d model of an ice cube on a white background royalty-free 3d model preview no. 3", "a 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe"], "question": "which object is white?", "label": 1}, {"captions": [" a red and blue metal-framed building structure with a steel house frame.", " a small house on an island with trees, shrubs, a pool, and a lake."], "sample_ids": ["a453ac83ef1e4f76bd7978451888d5f5", "c8331489fca44685bedfa1bdadf6ccb3"], "properties": ["color, material, frame", "house, lake, pool"], "captions_pred_pc": ["of a black and white photo of a bike on a white background", "a black and white image of a pattern on a piece of paper"], "captions_pred_image": ["a 3d model of a metal frame structure", "a 3d model of a large building"], "question": "which entity has a pool", "label": 1}, {"captions": ["a 3d white ring adorned with flowers and leaves.", "a 3d wooden toy castle with red and yellow towers, a red roof, and a man inside."], "sample_ids": ["8219c3b38ad547268e1828b9c3a487f1", "311f6655ed854899b07ea10f3613ef7a"], "properties": ["color, shape, material", "a, color, red"], "captions_pred_pc": ["a heart made of black dots on a white background a heart made of black dots on a white background illustration", "a black and white drawing of a wallet on a white background"], "captions_pred_image": ["a 3d printed ring in white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white", "a 3d model of a castle with two towers"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole.", " a house with a wooden-framed roof structure."], "sample_ids": ["b16fb21cda9a4a21a024df749c2304f4", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["roof, ceiling, hole", "roof, material, wood"], "captions_pred_pc": ["a black and white image of a square with dots on it", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a small house and a tree in the foreground", "a 3d model of a building with a roof"], "question": "which roof is made of wood", "label": 1}, {"captions": [" a small village featuring houses, trees, and a winding road.", " of a wooden table and chair."], "sample_ids": ["7acf46c0265d4e39b97ac084852abde8", "884d232f9cd54b6c8c41b6bf357c704f"], "properties": ["houses, trees, road", "table, chair, wood"], "captions_pred_pc": ["in 15 words or less a black and white photo of a mountain landscape", "a black and white pattern on a white background"], "captions_pred_image": ["a black and white photograph of a small town", "a 3d model of a small wooden table"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a castle on an island with a small floating house, trees, and clouds.", " of a wooden windmill with a red roof."], "sample_ids": ["c4c09479570943e2845fbd4c6a450568", "2ad8fca30285483d8b7f602fa078215d"], "properties": ["castle, island, house", "roof, color, red"], "captions_pred_pc": ["above a black and white illustration of a group of dots in the shape of a circle", "a white and black image of a snowflake in the shape of a snowflake"], "captions_pred_image": ["a 3d model of a small house on an island", "a 3d model of a windmill on a gray background"], "question": "which entity has a roof that is the color red", "label": 1}, {"captions": [" a house with a roof, wooden beams, and chimney.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["be1376023c274bdda995d54f3694157f", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["roof, beams, chimney", "roof, color, yellow"], "captions_pred_pc": ["a black and white drawing of a bathroom with a shower", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a house with a roof", "a 3d model of a house with a roof"], "question": "which roof is yellow", "label": 1}, {"captions": ["a black table lamp with a black shade and a hat on top.", " a small white barn with a metal roof."], "sample_ids": ["31c00c8337de4854a20299d719136cce", "4ca3342a96824684845f7d0e062ab176"], "properties": ["color, black, shade, black", "roof, metal, white"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a circular object on a white background royalty free illustration", "in 15 words or less a black and white illustration of a house made of dots"], "captions_pred_image": ["a 3d model of a table lamp royalty free 3d model preview no.2", "a 3d model of a barn"], "question": "which entity has a white roof", "label": 1}, {"captions": [" a green and black bat-winged sword with green flames.", " a black and white striped box."], "sample_ids": ["78c5d5bdf2da4b03a467ef9ab2a0c13f", "00fa8accaaad44c780efe0c04ed4a12b"], "properties": ["color, shape, material", "color, black, white"], "captions_pred_pc": ["of a person standing on a white background holding a long metal rod", "in 15 words or less a black and white pattern on a white background"], "captions_pred_image": ["a 3d model of a dragon's wing in the air", "a 3d image of a black and white striped surface"], "question": "which object is made of black and white?", "label": 1}, {"captions": ["s of a plane, house, pile of rocks, and islands, accompanied by watercolor paintings of a tree, rocks, and a boat in a sand dune.", " a large building with a roof and windows."], "sample_ids": ["8da8da6ccf5f4011a4115977c55d1cb8", "32d1fbd3ee91426882290305f70021e6"], "properties": ["s, plane, house", "roof, windows, building"], "captions_pred_pc": ["a black and white silhouette of a map", "of a black and white photo of a diamond buckle"], "captions_pred_image": ["a black and white image of a small island in the middle of a body of water", "a 3d model of an apartment building royalty free 3d model preview no.2"], "question": "which entity has a roof?", "label": 1}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "6b745457e06840119058883b35f78f58"], "properties": ["color, material, structure", "roof, color, blue"], "captions_pred_pc": ["a black and white drawing of a room with dots", "a black and white image of a building with dots"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a 3d model of a house with a steeple on top"], "question": "which structure is made of wood", "label": 1}, {"captions": [" a white rock with green grass and moss on it.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["0b53ca3fca7b4ef7bef27cb287daf32e", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["color, grass, moss", "torso, breasts, pattern"], "captions_pred_pc": ["above a black and white drawing of a long, curved line on a white background", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a 3d sculpture of a piece of white chocolate in the shape of a map", "a 3d model of a woman's chest"], "question": "which entity is a torso?", "label": 1}, {"captions": [" a wooden staircase with a door and square ceiling panel.", " a spiral staircase with a railing in a small building."], "sample_ids": ["3fe7e366bf924a00bf4b06ded35fd392", "28cae056856c4a8ba9d1a6af5355f831"], "properties": ["door, panel, staircase", "staircase, railing, building"], "captions_pred_pc": ["above a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black", "a black and white photograph of a light switch"], "captions_pred_image": ["a 3d model of a staircase on a wooden floor", "a 3d model of a staircase in a white room"], "question": "which staircase is in a building?", "label": 1}, {"captions": ["a white 3d-printed ring featuring a fish design and the word \"defia,\" accompanied by a white pen and logo.", "a black and white of a knife/sword with a handle."], "sample_ids": ["9a15d8285e614fb4b7d1cb7076a7b56a", "c7692fa635e049bda0a2039fa5a784a4"], "properties": ["color, white, pen, logo", "image, color, black and white"], "captions_pred_pc": ["of a black and white photo of a pair of earrings", "of a black and white knife on a white background"], "captions_pred_image": ["a white toothbrush on a gray background", "a black and white image of a knife"], "question": "which entity is black and white?", "label": 1}, {"captions": [" a black castle with stairs and a door.", " a house with wooden framing and trusses."], "sample_ids": ["036d34dec6274f6a99d8f9689d19a77d", "4501794e257c4a8ba60a94757d8e93a9"], "properties": ["door, stairs, color", "frame, trusses, wood"], "captions_pred_pc": ["for a black and white image of a toilet paper holder", "a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a medieval castle on a white background royalty free 3d model", "a 3d model of a house under construction"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a small house with a roof.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["9578e8de15ec44ce802072aaa4df3910", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["roof, house, small", "house, fence, playground"], "captions_pred_pc": ["above a black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a room with a lot of wires"], "question": "which house has a fence", "label": 1}, {"captions": [" a red, blue, and green striped tower building.", "a featuring a skeleton, torn paper, long stick, rock, and broken wood."], "sample_ids": ["8668f9e9d1a64b86b31f260b8056cd19", "46903bf029934b1989bc062dcb0a5531"], "properties": ["color, red, blue, green", "skeleton, torn, paper, long stick, rock, broken wood"], "captions_pred_pc": ["a black and white drawing of a butterfly on a white background a black and white drawing of a butterfly on a white background royalty free illustration", "a close up of a black object on a white background"], "captions_pred_image": ["a 3d model of a pair of cylindrical towers with a staircase leading up to the top of one of the towers", "a 3d sculpture of a person's hand in the air royalty-free 3d model preview"], "question": "which entity is a skeleton?", "label": 1}, {"captions": ["a 3d wooden pole with a metal spear handle.", " a city with buildings, houses, trees, and grass."], "sample_ids": ["1d5cf234576e41f0ba209c5e19d2db47", "bc649e19956041cf89c1572f1a33cff1"], "properties": ["- material is wood, metal, metal", "buildings, houses, grass"], "captions_pred_pc": ["of a black and white illustration of a vase on a pedestal", "in 15 words or less a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of an old-fashioned pitchfork", "an image of a pile of trash on the ground"], "question": "which entity has more grass", "label": 1}, {"captions": [" a small white house.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["c7ad04db657c42d8a3de4d90aaed0cd5", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["size, color, shape", "house, fence, playground"], "captions_pred_pc": ["a black and white image of two pieces of plastic", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a 3d model of a small white house", "a 3d model of a room with a lot of wires"], "question": "which house has a fence", "label": 1}, {"captions": [" of a stone wall with writing, metal bars, and a broken piece featuring a metal hook, resembling an egyptian cuneiform tablet.", " a building featuring graffiti, chinese writing, a door, a broken window, and an interior bathroom with a sink."], "sample_ids": ["9951e345202c44c19ab1eec11934bc52", "1ee3df6f94ea4c329a9c5245634e34d5"], "properties": ["image is a stone wall with writing, metal bars, and a broken piece featuring a metal hook, resembling an egyptian cuneiform tablet", "graffiti, chinese writing, door"], "captions_pred_pc": ["above a black and white drawing of a pair of binoculars", "a black and white illustration of a bridge with dots"], "captions_pred_image": ["a 3d rendering of a stone wall with a metal bracket holding it in place", "a black and white image of a bathroom with a sink and a toilet"], "question": "which entity has a door?", "label": 1}, {"captions": [" a white wooden climbing frame with swing set and ladders.", " of a small house featuring a flat, brown roof, table with two chairs and a stool, ceiling light, and a window."], "sample_ids": ["f2935306c64a479685462220e33e6f3c", "3a509431d96b43f8a7aebe2846f08b96"], "properties": ["- material is wood - color is white - height is 1.8 m", "roof, brown, flat"], "captions_pred_pc": ["an illustration of a barbed wire fence", "a black and white drawing of a snowflake on a white background a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a playground ladder set royalty free 3d model preview no.2", "a 3d rendering of a table and stool"], "question": "which entity has a brown roof", "label": 1}, {"captions": ["a white of a helmet and airplane wing.", "a featuring a fireplace with a broom, shovel, and other items, a man and a dog, a table with a bucket, and a vase with a pipe."], "sample_ids": ["17f3bb7773ef4b2ea76134896b105fbf", "fdcbb46224a44faca5c3fb20264eb2e7"], "properties": ["color, helmet, airplane wing", "broom, shovel, fireplace"], "captions_pred_pc": ["a black and white image of a person's head with dots all over it", "a close-up view of a white plastic bag with a small hole in it"], "captions_pred_image": ["a 3d model of a white helmet on a gray background", "a 3d model of an outdoor fireplace"], "question": "which entity has a fireplace?", "label": 1}, {"captions": [" a house with a roof and white brick wall.", " of a stone wall with a window and a clock."], "sample_ids": ["00915b83a52b45d498962d0cd42af491", "46fc13a04c8b47fa86215b9efc2eb1f9"], "properties": ["roof, wall, color", "window, clock, wall"], "captions_pred_pc": ["a black and white image of a rectangle with dots all over it", "a black and white illustration of a bullet in the middle of a starry sky"], "captions_pred_image": ["a 3d model of a small white house with a roof", "a 3d model of a brick wall"], "question": "which wall is made of stone", "label": 1}, {"captions": [" a house featuring a pink-purple roof with trusses and wooden ceiling beams.", "a white of a house with a hole in the ceiling."], "sample_ids": ["b6b6a3f82bdd47c3afaf9af885ba8703", "2915cbd03e164ac0bb13866c2d68cc65"], "properties": ["roof, trusses, beams", "image, house, ceiling"], "captions_pred_pc": ["a black and white pattern of dots in the shape of a square royalty free illustration", "above a black and white drawing of a house"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d model of a house with a balcony"], "question": "which house has a hole in the ceiling", "label": 1}, {"captions": ["a 3d wooden toy castle with red and yellow towers, a red roof, and a man inside.", " a small house with trees and plants, featuring a white box with a blue lid and a green triangle, accompanied by a christmas tree."], "sample_ids": ["311f6655ed854899b07ea10f3613ef7a", "4a889132cc444d10bfcbf6c760984416"], "properties": ["a, color, red", "a, color, white"], "captions_pred_pc": ["a black and white drawing of a wallet on a white background", "a black and white illustration of a dandelion on a white background dandelion illustration on a white background stock illustration \u00a9 iStock/Getty Images"], "captions_pred_image": ["a 3d model of a castle with two towers", "a 3d model of a desk and chair"], "question": "which object is white", "label": 1}, {"captions": [" of a white plastic tube with a hole and a chip on it.", "a victor calculator with a black plastic cover and wall-mounted design."], "sample_ids": ["9968e06a62e8487ea33460e640abc573", "88ffa01f4fc34a8cb3e2a659e9e26125"], "properties": ["color is white, material is plastic, shape is tube", "cover, black, plastic"], "captions_pred_pc": ["a black and white image of a broom on a stand", "of a black and white image of a skateboard"], "captions_pred_image": ["a white object on a gray background", "a victor calculator on a white background"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a house with a pink roof, brick walls, and insulated ceiling.", " of a white and brown sea shell with a hole and small pearls on it, resembling a sea urchin."], "sample_ids": ["c8936ace72954650b4e2d84246964849", "411c164757fc4de68dfecb35fa858223"], "properties": ["roof, color, pink", "resembles, sea urchin, shell"], "captions_pred_pc": ["a black and white drawing of a toilet", "in 15 words or less a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a"], "captions_pred_image": ["a 3d model of a house with a roof", "a 3d model of a sea urchin"], "question": "which entity is a shell?", "label": 1}, {"captions": [" a silver spaceship-skull hybrid with blue lights, spheres, and jewels, featuring a robot with a head and blue eyes.", " a house featuring a roof with wooden trusses and a ladder."], "sample_ids": ["ae7c805076b14abe83578069c7bf1c03", "3cd410c4359a4cef98702963a2b9802b"], "properties": ["color, light, jewels", "roof, trusses, ladder"], "captions_pred_pc": ["in 15 words or less a black and white image of a snowflake on a white background royalty free illustration", "a black and white drawing of a tv screen on a white background"], "captions_pred_image": ["a 3d sculpture of a heart with various objects on top of it", "a 3d model of the roof of a building"], "question": "which entity has a roof with wooden trusses and a ladder?", "label": 1}, {"captions": [" a building with wooden and steel structures, featuring stairs, railings, and a ceiling with numerous pipes.", " a house with a blue roof."], "sample_ids": ["26c47880756b4876b4f263373c3c5303", "8ff693cd3ca74f8a901ca259b8b3a7ac"], "properties": ["building, material, steel", "roof, color, blue"], "captions_pred_pc": ["a black and white drawing of a floor plan", "a black and white drawing of a cross on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a large structure with multiple levels", "a 3d model of a house with a roof"], "question": "which building has a blue roof", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " a wooden billiard table with legs."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "b20ad62516fa467ba6e8de063998e8e4"], "properties": ["roof truss, insulation, suspended ceiling", "legs, material, wood"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "a black and white drawing of a rectangular shaped object"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a black and white image of a pool table"], "question": "which object has legs", "label": 1}, {"captions": ["a 3d object featuring a flat roof with blue and white pattern, a purple and white bench, a blue and purple striped runner, a bed with blue and purple stripes, blue and white striped paper, a bench with a blue and purple pattern, and a ceiling light fixture with wooden slats.", " a house with a wooden-framed roof structure."], "sample_ids": ["28d0cf71ed684dcb87b2c9b2744c3633", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["runner, bed, bench", "roof, material, wood"], "captions_pred_pc": ["a black and white drawing of a snowflake on a white background a creative black and white drawing of a snowflake on a white background royalty free illustration", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a long, curved structure", "a 3d model of a building with a roof"], "question": "which roof is made of wood", "label": 1}, {"captions": [" of a red and white pokeball with the word \"pichu\" on it, compatible with various modeling and animation software.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["7d76c44cb9dd4948b8766d83994ca5f3", "06a1c233fb444830b577aa06e2c01294"], "properties": ["- material is stl, obj, fbx- size is 240px- color is red, white", "house, tree, hill"], "captions_pred_pc": ["a black and white circular pattern on a white background", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a close-up view of a black ball with a screw in the center", "a black and white image of a house in the middle of a field"], "question": "which entity has more trees", "label": 1}, {"captions": [" a small orange piano with legs.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["d793de7c08d74414beeb8ea50f730705", "a17477b445b3443189dad22f768b888b"], "properties": ["color is orange, size is small, legs", "roof, pillar, stairs"], "captions_pred_pc": ["a black and white illustration of a bench", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d model of a grand piano royalty free 3d model preview no 2", "a 3d model of a small building with a balcony"], "question": "which object has a roof", "label": 1}, {"captions": [" a small house with a blue roof, a door, and a pool.", " a house with a flat roof structure."], "sample_ids": ["40c52c2d278345c5b4e8d00a991271dc", "abc52d210d71415296730bb00352ce6f"], "properties": ["door, roof, pool", "roof, flat, structure"], "captions_pred_pc": ["of a black and white photo of a window with fringes", "a black and white drawing of a window with dots around it"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a house with a roof"], "question": "which house has a roof that is not flat?", "label": 1}, {"captions": [" a modern white house with furniture and a black accent.", " a small, snow-covered house."], "sample_ids": ["e5025a1ca0034b1aa97a0d42edeeae0f", "0d00d10b90134dbe9ce7b2b3d6669237"], "properties": ["color, white, furniture, black", "house, snow, cover"], "captions_pred_pc": ["above a black and white drawing of a floor plan", "in 15 words or less a black and white image of a piece of paper on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white building with black and white tiles", "a piece of broken glass on a white background"], "question": "which house is covered in snow", "label": 1}, {"captions": ["a featuring a sailboat with a hook, a bird suspended in the air, and a pair of scissors.", " a small yellow table with a staircase and a square ceiling light fixture."], "sample_ids": ["f57ae66555d34349aeadc38b33f8f267", "36f4d2cbd02345c6a77f7345ebde841c"], "properties": ["a, bird, hook", "table, staircase, light"], "captions_pred_pc": ["of a 3d scan of a person's torso and limbs", "a black and white photo of the letter g"], "captions_pred_image": ["a black and white photo of a kite flying in the sky", "a 3d model of a table with stairs"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" a white building with a square ceiling panel and a white 3d printed plane on top.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["eb3ea0e6963f4efda2a8cf0732befd56", "97e000ff41094665afd94ea565da8b13"], "properties": ["- material is 3d printed, ceiling panel is square, plane is white", "roof, material, wood"], "captions_pred_pc": ["above a black and white drawing of a cross", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d model of a building on a white surface", "a 3d model of the roof of a building"], "question": "which roof is made of wood", "label": 1}, {"captions": ["s of a skateboard, snowboard, door, and three pairs of shoes, along with a paper mask and a paper with a hole.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["a267d906e4cf4d36bfe841c6cc9e698b", "97e000ff41094665afd94ea565da8b13"], "properties": ["s of, snowboard, shoes, mask", "roof, material, wood"], "captions_pred_pc": ["above a black and white image of a person standing on a piece of paper", "a black and white drawing of a floor plan"], "captions_pred_image": ["a black and white image of a surfboard flying through the air", "a 3d model of the roof of a building"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": [" a small house with stairs and a balcony.", " a large building with a roof and windows."], "sample_ids": ["0fbc5f16d301450c820b1f2158fd4f69", "32d1fbd3ee91426882290305f70021e6"], "properties": ["balcony, stairs, house", "roof, windows, building"], "captions_pred_pc": ["a black and white image of a square with dots on it", "of a black and white photo of a diamond buckle"], "captions_pred_image": ["a 3d model of a building with two floors and a balcony", "a 3d model of an apartment building royalty free 3d model preview no.2"], "question": "which building has a roof?", "label": 1}, {"captions": [" a house with wooden framing and trusses.", " a wooden billiard table with legs."], "sample_ids": ["4501794e257c4a8ba60a94757d8e93a9", "b20ad62516fa467ba6e8de063998e8e4"], "properties": ["frame, trusses, wood", "legs, material, wood"], "captions_pred_pc": ["a black and white drawing of a window", "a black and white drawing of a rectangular shaped object"], "captions_pred_image": ["a 3d model of a house under construction", "a black and white image of a pool table"], "question": "which object is made of wood", "label": 1}, {"captions": ["a white hat, plastic cup with a lid, and a bowl.", "a white glass beer mug."], "sample_ids": ["4a9d79b48eda4ad797a652ee01b1b026", "1d686cbd3e9a4c629a43088658989286"], "properties": ["hat, cup, bowl", "color, white, glass"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a funnel", "a black and white drawing of a beer mug on a white background"], "captions_pred_image": ["a white plastic hat and bowl on a gray background", "a 3d model of a glass pitcher"], "question": "which object is made of glass", "label": 1}, {"captions": [" of a wooden stand featuring two legs, six balls, and a row of teddy bears, resembling a puzzle toy, game board, and bookshelf.", " a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles."], "sample_ids": ["e36ba9c060cd49f48a0acc1790fcf049", "bd7aab78974643f5a0660c699daf8eb3"], "properties": ["resembles, toy, bookshelf", "roof, color, yellow"], "captions_pred_pc": ["a black and white image of a book cover", "a black and white drawing of a room"], "captions_pred_image": ["a 3d rendering of a wooden display stand with six cups on it", "a 3d model of a table and chairs on a white background"], "question": "which entity has a yellow roof", "label": 1}, {"captions": [" a brick wall with grass.", " a stone wall featuring carvings and statues."], "sample_ids": ["53f2d948091f417cb580e22469c94db2", "42f663140f834d1ab5f95cd8a5ad04b3"], "properties": ["brick, grass, wall", "carving, statue, wall"], "captions_pred_pc": ["above a black and white illustration of an underwater scene", "a black and white image of a snowflake on a white background"], "captions_pred_image": ["a black and white photo of a brick wall and a puddle", "a 3d image of a group of statues on a wall"], "question": "which wall is made of stone", "label": 1}, {"captions": [" a green skull and sphere.", " a clay pot with holes in it."], "sample_ids": ["4f4dc1300ab24b0a910da77a4d5e783f", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["color, skull, sphere", "hole, material, clay"], "captions_pred_pc": ["a black and white illustration of a dandelion on a white background dandelion illustration on a white background illustration", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a skull on a gray background", "a clay sculpture of a face with holes in it"], "question": "which object is made of clay", "label": 1}, {"captions": [" a white rock with green grass and moss on it.", " a city with buildings, houses, trees, and grass."], "sample_ids": ["0b53ca3fca7b4ef7bef27cb287daf32e", "bc649e19956041cf89c1572f1a33cff1"], "properties": ["color, grass, moss", "buildings, houses, grass"], "captions_pred_pc": ["above a black and white drawing of a long, curved line on a white background", "in 15 words or less a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d sculpture of a piece of white chocolate in the shape of a map", "an image of a pile of trash on the ground"], "question": "which entity has more grass", "label": 1}, {"captions": ["a featuring a bowl of soup, a brush, a knife, a slice of pizza, a torn piece of paper, and a roof with a hole in it.", "a featuring a fireplace with a broom, shovel, and other items, a man and a dog, a table with a bucket, and a vase with a pipe."], "sample_ids": ["db19e46828c94b1a8b9a6ca9f673c604", "fdcbb46224a44faca5c3fb20264eb2e7"], "properties": ["a, roof, soup", "broom, shovel, fireplace"], "captions_pred_pc": ["in 15 words or fewer a black and white illustration of the moon", "a close-up view of a white plastic bag with a small hole in it"], "captions_pred_image": ["a 3d model of a bowl and chopsticks on a sheet of paper", "a 3d model of an outdoor fireplace"], "question": "which entity has a fireplace?", "label": 1}, {"captions": [" of a japanese-style pagoda house in a pixelated village.", " a small house on a hill in a field."], "sample_ids": ["42c4e6ca4a0c4b7b9a97d543b2442222", "bd873071252047d38160c4a5fdd2c1b7"], "properties": ["image is a japanese-style pagoda house in a pixelated village", "house, hill, field"], "captions_pred_pc": ["a black and white drawing of a square on a white background stock illustration \u00a9 2019 iStock", "a black and white photograph of a piece of paper"], "captions_pred_image": ["a 3d model of a japanese temple and pagoda", "a black and white image of a small house"], "question": "which house is on a hill?", "label": 1}, {"captions": [" a small white building with a door, resembling a box-shaped house.", " a small house with a blue roof, a door, and a pool."], "sample_ids": ["1b5fe88d0ff149ae9d8b4eb455c5c90c", "40c52c2d278345c5b4e8d00a991271dc"], "properties": ["shape is box, color is white, door is present", "door, roof, pool"], "captions_pred_pc": ["a black and white image of a person standing in front of a white background", "of a black and white photo of a window with fringes"], "captions_pred_image": ["a 3d model of a white, open shelving unit", "a 3d model of a small house"], "question": "which house has a pool", "label": 1}, {"captions": [" a large house/building structure with a roof.", " a house with a blue roof."], "sample_ids": ["82859e4c6d4e4bbea94b6252bef1d398", "8ff693cd3ca74f8a901ca259b8b3a7ac"], "properties": ["roof, structure, house", "roof, color, blue"], "captions_pred_pc": ["a black and white photograph of a metal sculpture", "a black and white drawing of a cross on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a large white structure", "a 3d model of a house with a roof"], "question": "which structure has a roof", "label": 1}, {"captions": [" a building with blue lines and structure.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["3bf337b699664ca0adf0817962d58718", "b896a0898efe4059a776193c02132129"], "properties": ["color, shape, structure", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white drawing of a floor plan", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a building under construction", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": [" a gray object featuring an axe, guitar headstock, and head.", "a black motorcycle helmet with a face mask and visor."], "sample_ids": ["ac5c86f38c8e4570a7eefff0958185cf", "ad6df43a2ce24edfb15f5bb64755ed0d"], "properties": ["Headstock, Guitar, Head", "color, black, visor"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "a black and white circular shape made up of many small dots on a white background a black and white circular shape made up of many small dots on a white background illustration"], "captions_pred_image": ["a 3d model of an axe head", "a black motorcycle helmet with a visor on top"], "question": "which object has a visor", "label": 1}, {"captions": ["a white of a small, multifunctional vehicle resembling a container, spaceship, toy car, and machine gun.", " a colorful, wire-framed building structure resembling a cube."], "sample_ids": ["c0cc779380c7408f9061905c4ee19726", "62b7c7c684044d998fee9ff35beeb79b"], "properties": ["color, shape, size", "color, frame, shape"], "captions_pred_pc": ["a black and white illustration of a toothbrush and toothpaste", "a black and white illustration of a building made up of dots"], "captions_pred_image": ["a 3d model of a small white object on a gray background", "a 3d model of a building structure"], "question": "which entity is a cube?", "label": 1}, {"captions": [" of a small wooden house with two roofs.", " of a plague mask with a rusty, horned, wooden helmet and a crow's head design."], "sample_ids": ["30fc23ae4edb42609e30e029dede54bd", "2b0896f810074399a5ae7d6dbab8c330"], "properties": ["house, roof, wooden", "- material is wood, rusty, horned"], "captions_pred_pc": ["of a pair of stainless steel screws on a white background", "in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small barn", "3d model of a plague doctor's mask"], "question": "which object is made of wood", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", " of a large axe with a cross on top."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "4b6734945e204158b076a429a30ce2e9"], "properties": ["color, shape, and size", "axe, cross, top"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "for an axe on a white background"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a black and white image of an axe on a gray background"], "question": "which object has a cross on top", "label": 1}, {"captions": ["a featuring white and red cubes, and a pink and white chair.", " a purple chair with holes in it."], "sample_ids": ["f2c44a82ba744ba8b93e9a1c2272c117", "833151c8e0f4489a9fa966635a948452"], "properties": ["color, white, red, pink", "color, purple, holes"], "captions_pred_pc": ["a black and white illustration of a house made of dots", "of a silver pendant with an intricate design"], "captions_pred_image": ["a 3d model of a white structure with stairs", "a 3d model of a white chair"], "question": "which chair has holes in it", "label": 1}, {"captions": [" a city with buildings, houses, trees, and grass.", " a small house with stairs and a roof."], "sample_ids": ["bc649e19956041cf89c1572f1a33cff1", "e9305c80010f4e3b9de9789f01a9bee5"], "properties": ["buildings, houses, grass", "roof, stairs, house"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a snowflake on a white background royalty free illustration", "above a black and white image of a square with dots all over it"], "captions_pred_image": ["an image of a pile of trash on the ground", "a 3d rendering of a podium on a wooden floor"], "question": "which entity has a roof", "label": 1}, {"captions": ["a featuring a small room with a bunk bed, desk, chair, table, and a blue house.", "s of a boat, bird, paper airplane, and kite flying in the air."], "sample_ids": ["dd3a9323ed514ccab330973ff9588015", "795cfa41d48a4cfc893ff1981318594d"], "properties": ["room, bed, desk", "s, boat, bird, airplane, kite"], "captions_pred_pc": ["a black and white drawing of a door", "above a 3d illustration of a boy standing with his arms outstretched"], "captions_pred_image": ["a 3d model of a small room with a bunk bed", "a white kite flying in the air against a gray background"], "question": "which entity has a boat", "label": 1}, {"captions": [" featuring a sandbox, sand bucket, wooden blocks, water container, and a lamp made out of blocks.", " a small house on a hill in a field."], "sample_ids": ["674a36147ffb47059e48abc9fa19d923", "bd873071252047d38160c4a5fdd2c1b7"], "properties": ["sandbox, sand bucket, wooden blocks", "house, hill, field"], "captions_pred_pc": ["for a black and white photo of a basketball hoop", "a black and white photograph of a piece of paper"], "captions_pred_image": ["a 3d model of a brick, a box, and a pile of dirt royalty free 3d model preview no. 3", "a black and white image of a small house"], "question": "which object is in a field?", "label": 1}, {"captions": [" a house with a yard, trees, bushes, and surrounding buildings.", " a wooden table and bench with a deer head and branch on it."], "sample_ids": ["7f8942ef51dd4246993a587a12df168c", "857d5391612349f4ae6cd854a1ec96de"], "properties": ["house, yard, surrounding buildings", "table, bench, deer"], "captions_pred_pc": ["a black and white image of a truck on a white background", "a black and white drawing of a table and chairs"], "captions_pred_image": ["a 3d model of a house in the middle of a field", "a black and white image of a bench and table with a deer's head on the table"], "question": "which entity is a table?", "label": 1}, {"captions": [" a car dealership interior featuring a showroom, repair shop, and various elements like a booth, bed, and ceiling light.", "star wars stormtrooper "], "sample_ids": ["3e22efacf9ee40a1a6b2e4b72a7314d2", "05678f11f4fe47178c9b4941ee334e16"], "properties": ["a, booth, bed", "a, color, white"], "captions_pred_pc": ["a black and white drawing of a tv screen with dots all over it royalty free illustration", "a black and white illustration of a stormtrooper"], "captions_pred_image": ["a 3d rendering of a room with a black and white color scheme", "a 3d model of a star wars stormtrooper"], "question": "which entity is white", "label": 1}, {"captions": [" of a small wooden house with two roofs.", " a small white house with a staircase and a window."], "sample_ids": ["30fc23ae4edb42609e30e029dede54bd", "9eb88d17310d42dda9e17883e9922525"], "properties": ["house, roof, wooden", "house, staircase, window"], "captions_pred_pc": ["of a pair of stainless steel screws on a white background", "a black and white illustration of a person standing in the middle of a crowd of tiny black dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small barn", "a 3d rendering of a small room with a staircase"], "question": "which house has a staircase?", "label": 1}, {"captions": ["a featuring a large flying ship, a mountain range with a central lake, and a small island resembling hawaii.", " of a wrecked plane, ship, and bird on a pile of rocks with grass."], "sample_ids": ["4d613d2057454e719bcae7f8cf05210a", "b0c703df20154bbf9fd8707c61137fc5"], "properties": ["a, island, resembles, hawaii", "plane, ship, bird"], "captions_pred_pc": ["above a black and white image of a person's hand holding a pencil", "a black and white watercolor map of the state of ohio"], "captions_pred_image": ["a 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a", "a black and white photograph of a pile of debris on the ground"], "question": "which entity has a plane", "label": 1}, {"captions": [" a small house with stairs and a roof.", " a house featuring a pink-purple roof with trusses and wooden ceiling beams."], "sample_ids": ["e9305c80010f4e3b9de9789f01a9bee5", "b6b6a3f82bdd47c3afaf9af885ba8703"], "properties": ["roof, stairs, house", "roof, trusses, beams"], "captions_pred_pc": ["above a black and white image of a square with dots all over it", "a black and white pattern of dots in the shape of a square royalty free illustration"], "captions_pred_image": ["a 3d rendering of a podium on a wooden floor", "a 3d model of the roof of a building"], "question": "which house has a roof with trusses and wooden ceiling beams", "label": 1}, {"captions": [" a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog.", " a white castle composed of small cubes."], "sample_ids": ["f6c6e7a65a3e42dfa431b1d984c72f28", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["house, fence, dog", "composed of, white, cubes"], "captions_pred_pc": ["above a black and white drawing of a bathroom", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a", "a 3d model of a castle made of white cubes"], "question": "which entity is composed of small cubes", "label": 1}, {"captions": [" the white text \"ekberkaslan\" with a row of white cubes and a numbered box.", " a stone wall featuring carvings and statues."], "sample_ids": ["87ee30d475f34b799c24bf7ef3a7b540", "42f663140f834d1ab5f95cd8a5ad04b3"], "properties": ["- color is white- shape is cubes- number is 1", "carving, statue, wall"], "captions_pred_pc": ["a close up of a black and white striped scarf", "a black and white image of a snowflake on a white background"], "captions_pred_image": ["a 3d image of the word ebercaskalan on a white background", "a 3d image of a group of statues on a wall"], "question": "which entity has more statues", "label": 1}, {"captions": [" a small gnome chandelier with three candles, featuring red and blue lights and adorned with pink and blue ribbons.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["6398f4e75a2d480da58396827ac64249", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["- color is red, blue, pink", "island, terrain, water"], "captions_pred_pc": ["for a black and white image of an object on a white background", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a white chandelier with three light bulbs hanging from the ceiling", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": ["a 3d object featuring elements of a green and purple lamp, a cartoon character with a light bulb, an ice cream cone, a syringe, a light pole, a pen with a face, and a doctor's stethoscope.", " a small house with a pond and situated on a rock."], "sample_ids": ["42d3657ce7bc4b5dbeb7f444e089a715", "92859eb82a344134806b37cc209927c6"], "properties": ["a lamp, a syringe, a light pole", "house, rock, pond"], "captions_pred_pc": ["a spoon with black dots on a white background spoon with black dots on a white background, 3d illustration royalty free stock photo", "in 15 words or less a black and white drawing of a toaster"], "captions_pred_image": ["a 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop", "a 3d model of a house in the middle of a field"], "question": "which entity has a pond?", "label": 1}, {"captions": [" a stone arrowhead with blue crystals and ice-like features.", " a wooden staircase with a red and white railing and a red arrow."], "sample_ids": ["5f8c7eda0f464019a4acea243114555d", "bb8bb4c9972d4b718b8bbd3ed5fdd14d"], "properties": ["- material is stone - color is blue - shape is arrowhead", "arrow, red, white"], "captions_pred_pc": ["above a black and white drawing of an arrow", "above a black and white image of a square with a white cross in the center"], "captions_pred_image": ["a 3d model of a large piece of ice on a white background royalty free 3d model preview no 3", "a 3d model of a spiral staircase"], "question": "which arrow is red", "label": 1}, {"captions": ["a featuring a futuristic chair, a black cat with a sword, a person holding an umbrella, and a black and blue dragon, airplane, and helicopter.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["4df70180f2ea400782d2e2de76063894", "c3a82df41875402285608ef13a55df57"], "properties": ["color, black, blue", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a 3d illustration of a girl in a dress 3d illustration of a girl in a dress, isolated on a white background royalty free stock illustration", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a black and white 3d model of a person holding a sword in the shape of a paper airplane", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a green, spiked cube-like creature with eyes and a liquid-like texture.", " a city with buildings, houses, trees, and grass."], "sample_ids": ["0d2850148a154f3ca72d72817e120759", "bc649e19956041cf89c1572f1a33cff1"], "properties": ["texture, spikes, eyes", "buildings, houses, grass"], "captions_pred_pc": ["a black and white drawing of a flower", "in 15 words or less a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of an ice cube on a white background royalty-free 3d model preview no. 3", "an image of a pile of trash on the ground"], "question": "which entity has grass", "label": 1}, {"captions": [" a house with a green roof and lawn.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "bded33af34104b9686b845dfd18309a9"], "properties": ["roof, green, lawn", "table, staircase, light"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a 3d model of a small table with a staircase"], "question": "which entity has a staircase?", "label": 1}, {"captions": ["a 3d white object resembling a sphere, frog, egg, hat, and shell.", " a four-legged metal workbench with shelves."], "sample_ids": ["0a8e0b95d8ce43ee9159ad01d925aad8", "e93b633d477942d9b79ef8ab566473d6"], "properties": ["shape is sphere, color is white, material is plastic", "Four legs, Metal, Shelf"], "captions_pred_pc": ["a black and white illustration of a sponge in the shape of a sponge sponge black and white illustration of a sponge in the shape of a sponge royalty free illustration", "for a black and white illustration of a cross"], "captions_pred_image": ["a 3d sculpture of an apple on a white background", "a 3d model of a table with four legs"], "question": "which object is made of metal", "label": 1}, {"captions": ["a white 3d-printed ring featuring a fish design and the word \"defia,\" accompanied by a white pen and logo.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["9a15d8285e614fb4b7d1cb7076a7b56a", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["color, white, pen, logo", "island, mountain, grass"], "captions_pred_pc": ["of a black and white photo of a pair of earrings", "a black and white map of the island of malta"], "captions_pred_image": ["a white toothbrush on a gray background", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more grass", "label": 1}, {"captions": ["3d white geometric shapes and paper clip.", " of a plague mask with a rusty, horned, wooden helmet and a crow's head design."], "sample_ids": ["8760487af6c748ac8111f09113a77b16", "2b0896f810074399a5ae7d6dbab8c330"], "properties": ["3D, white, paper clip", "- material is wood, rusty, horned"], "captions_pred_pc": ["of a black and white knife on a white background", "in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d printed diamond shaped object on a white background", "3d model of a plague doctor's mask"], "question": "which object is made of wood", "label": 1}, {"captions": ["s of a rock, boat, plane, and leaf on a stick.", "s of a rock, boat, plane, and leaf on a stick."], "sample_ids": ["be0884a7ced34b3d92687b6087798a1e", "be0884a7ced34b3d92687b6087798a1e"], "properties": ["s, stick, leaf", "s, stick, leaf"], "captions_pred_pc": ["above a black and white drawing of an object floating in the sky", "above a black and white drawing of an object floating in the sky"], "captions_pred_image": ["a black and white photograph of a rock on a sandy surface", "a black and white photograph of a rock on a sandy surface"], "question": "which entity has a rock", "label": 0}, {"captions": [" a house with a pink roof, brick walls, and insulated ceiling.", " a metal building with a purple roof and cage structure."], "sample_ids": ["c8936ace72954650b4e2d84246964849", "cbc10fb816034537b052e7c8fb75c4a6"], "properties": ["roof, color, pink", "roof, purple, structure"], "captions_pred_pc": ["a black and white drawing of a toilet", "for a black and white image of a bench"], "captions_pred_image": ["a 3d model of a house with a roof", "a 3d model of a building with a metal roof"], "question": "which roof is made of metal", "label": 1}, {"captions": [" of a white and wooden chest of drawers cabinet.", " a small house with stairs and a roof."], "sample_ids": ["d5722274fb094222aca90bb59f4dff09", "e9305c80010f4e3b9de9789f01a9bee5"], "properties": ["chest of drawers, cabinet, white", "roof, stairs, house"], "captions_pred_pc": ["a cross on a white background vector illustration of a cross on a white background royalty free stock illustrations", "above a black and white image of a square with dots all over it"], "captions_pred_image": ["a 3d rendering of a white cabinet with a drawer", "a 3d rendering of a podium on a wooden floor"], "question": "which object has a roof", "label": 1}, {"captions": [" of a small white dollhouse featuring furnished rooms, including a bedroom with a bed and desk, and a bathroom.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["f178fb523ad7421aaa90a92ee736ee00", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["bedroom, bathroom, bed", "house, pool, balcony"], "captions_pred_pc": ["a black and white drawing of a room with dots", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a small room with a bed, desk, and chair", "a 3d model of a modern house"], "question": "which entity has a pool", "label": 1}, {"captions": [" a purple and green sphere.", " of two rocks with ice elements."], "sample_ids": ["906b2d1219804f4f9e57d4f6cfd47a83", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["color, shape, size", "image is a rock with ice elements"], "captions_pred_pc": ["in 15 words or less a black and white image of a sphere in the shape of a light bulb royalty free illustration", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a ball with a face on it", "a 3d image of two rocks on a gray surface"], "question": "which entity is a rock?", "label": 1}, {"captions": [" a glass bottle with liquid, ice, and a lid, resembling a salt shaker and a human figure.", "a low poly of a plant on a white object, resembling a paper or plastic bag."], "sample_ids": ["bb01282cb8b64470866260455b0b46fa", "d49d8ed244094349a99e4faca05e0690"], "properties": ["liquid, ice, lid", "low poly, plant, white"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a dandelion on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration", "a black and white illustration of a dandelion on a white background a black and white illustration of a dandelion on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a plastic bottle on a white background", "a 3d model of a plant growing out of a rock"], "question": "which entity is a low poly of a plant on a white object?", "label": 1}, {"captions": [" a white and black horned demon with bunny features.", " a gray object featuring an axe, guitar headstock, and head."], "sample_ids": ["1d3537d1341a423f89990e9b06924904", "ac5c86f38c8e4570a7eefff0958185cf"], "properties": ["color, horns, features", "Headstock, Guitar, Head"], "captions_pred_pc": ["above a black and white image of a pair of headphones", "a black and white image of a toothbrush on a white background"], "captions_pred_image": ["a 3d model of a skull with horns on its head", "a 3d model of an axe head"], "question": "which entity has a headstock", "label": 1}, {"captions": [" of a white supreme logo t-shirt, low poly design.", " a large house with a roof on a platform."], "sample_ids": ["bea8441c08d94366b96b53775391d8e6", "cb3e09a301b746918a682a595037c7f7"], "properties": ["color, white, logo", "roof, platform, house"], "captions_pred_pc": ["for a black and white image of a shirt with dots", "a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of a white t-shirt with a supreme logo", "a 3d model of a small house"], "question": "which entity is a building?", "label": 1}, {"captions": [" of a stone wall with writing, metal bars, and a broken piece featuring a metal hook, resembling an egyptian cuneiform tablet.", " of a wrecked plane, ship, and bird on a pile of rocks with grass."], "sample_ids": ["9951e345202c44c19ab1eec11934bc52", "b0c703df20154bbf9fd8707c61137fc5"], "properties": ["image is a stone wall with writing, metal bars, and a broken piece featuring a metal hook, resembling an egyptian cuneiform tablet", "plane, ship, bird"], "captions_pred_pc": ["above a black and white drawing of a pair of binoculars", "a black and white watercolor map of the state of ohio"], "captions_pred_image": ["a 3d rendering of a stone wall with a metal bracket holding it in place", "a black and white photograph of a pile of debris on the ground"], "question": "which entity is a wrecked plane", "label": 1}, {"captions": [" a small white box with a green light.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["a5e37b9c782c4340b4dea45fbe1c701a", "97e000ff41094665afd94ea565da8b13"], "properties": ["color, white, light", "roof, material, wood"], "captions_pred_pc": ["a 3d sculpture of a vase made of small black dots on a white background 3d sculpture of a vase made of small black dots on a white background royalty free illustration", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d rendering of a small white box", "a 3d model of the roof of a building"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a house featuring a purple roof and a suspended ceiling with a light fixture.", " a house with a roof, featuring roof truss, insulation, and a suspended ceiling."], "sample_ids": ["579b43057ef74bd08d06bdd3e8d973a0", "c1462fda08db4a769f68adae5c88cd43"], "properties": ["roof, purple, suspended", "roof truss, insulation, suspended ceiling"], "captions_pred_pc": ["a black and white image of a piece of paper with a pattern of dots on it", "a black and white drawing of an arrow pointing to the right"], "captions_pred_image": ["a 3d model of the roof of a building royalty free 3d model no.", "a 3d model of a white bench with a grid pattern"], "question": "which house has a roof truss", "label": 1}, {"captions": ["yellow and white 3d corn on the cob model resembling a toothbrush.", "a white of a man with arms outstretched."], "sample_ids": ["1fd0e7ffe26349da89815a6d4d6a189a", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["color, shape, material", "image, color, white"], "captions_pred_pc": ["a black and white illustration of a circle made up of tiny dots", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a", "a 3d model of a man with his arms outstretched"], "question": "which entity is a white image?", "label": 1}, {"captions": ["a red circle, red and white arrow, mouse, and child's handwriting.", " of a small island featuring a white lighthouse, a fountain, and a grassy crater."], "sample_ids": ["3c233f87bf264968a7f0660b9eac9e4a", "2a30e69498ff4fd1a33c1fb72286f553"], "properties": ["red, mouse, handwriting", "lighthouse, fountain, crater"], "captions_pred_pc": ["in 15 words or less a black and white image of a person's hand holding a pencil and drawing on a piece of paper", "a black beanie with sparkles on a white background"], "captions_pred_image": ["a black and white drawing of a person's hand holding a pencil", "a black and white image of an object on top of a pedestal"], "question": "which entity has a fountain?", "label": 1}, {"captions": ["a 3d printed green robot with two arms and two legs.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["04e3e8ce541e487b9e342570fe1b4eb2", "c3a82df41875402285608ef13a55df57"], "properties": ["color is green, material is 3d printed, number of limbs is 2", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white image of a robot in the shape of a snowflake", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a futuristic robot standing on its hind legs", "a white plastic object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a featuring a boat, table, chairs, umbrella, and solar panel.", " a small house featuring a wooden floor, stairs, a bathroom with a sink and toilet, and a white box on a table."], "sample_ids": ["0f0eb3a198d341d28f809b6d7634be8a", "73f2780847f14547b9ae5f9e8a81e348"], "properties": ["boat, table, chairs, umbrella, solar panel", "floor, stairs, bathroom"], "captions_pred_pc": ["a black and white illustration of a boat with an umbrella", "a black and white image of a leopard print pattern"], "captions_pred_image": ["a 3d model of a boat, a table, chairs, and an umbrella", "a 3d model of a small table"], "question": "which entity has a bathroom?", "label": 1}, {"captions": ["yellow and white 3d corn on the cob model resembling a toothbrush.", " a staircase with a railing, table, and chair, featuring a square ceiling light."], "sample_ids": ["1fd0e7ffe26349da89815a6d4d6a189a", "51a0fba79bce472a8b827b78a110b77f"], "properties": ["color, shape, material", "stair, table, chair"], "captions_pred_pc": ["a black and white illustration of a circle made up of tiny dots", "for a black and white image of a toilet paper holder"], "captions_pred_image": ["a 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a", "a 3d model of a staircase in a room"], "question": "which entity has more stairs", "label": 1}, {"captions": ["a white glass beer mug.", " a rusted metal barrel with a yellow and red warning sign and stripe on it."], "sample_ids": ["1d686cbd3e9a4c629a43088658989286", "5a49ad82ef7a4d33badea2261720f518"], "properties": ["color, white, glass", "rusty, warning, metal"], "captions_pred_pc": ["a black and white drawing of a beer mug on a white background", "a black and white drawing of dots on a white background a black and white drawing of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a glass pitcher", "a black and white photograph of a barrel"], "question": "which object is made of metal", "label": 1}, {"captions": [" a building or house with a roof and floor plan, resembling a pyramid with a flat roof.", " a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole."], "sample_ids": ["7a91292e1ed64e60a1bbbb499209a0df", "b16fb21cda9a4a21a024df749c2304f4"], "properties": ["apse, roof, floor plan", "roof, ceiling, hole"], "captions_pred_pc": ["a black and white drawing of a room", "a black and white image of a square with dots on it"], "captions_pred_image": ["a 3d model of a building in the shape of a pyramid", "a 3d model of a small house and a tree in the foreground"], "question": "which entity has a roof with a hole?", "label": 1}, {"captions": [" a small island with trees, water, and a river.", " an african figurine with a bow and arrow, standing on a wooden base, wearing a hat."], "sample_ids": ["5a9c593092c04deaa0f17a1c28a79476", "d81d13362ae04371bb2cba46e4939665"], "properties": ["water, river, island", "hat, bow, arrow"], "captions_pred_pc": ["in 15 words or less a black and white polka dot pattern on a white background", "above a black and white photo of an ice sculpture"], "captions_pred_image": ["a 3d model of a snowy landscape with trees in the foreground royalty free 3d model preview no.2", "a sculpture of an african man sitting on a pedestal"], "question": "which entity has a hat?", "label": 1}, {"captions": [" a tall glass tower featuring blue, green, and white squares.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["405d68eda26c401fbcddc7e3a457c74e", "6b745457e06840119058883b35f78f58"], "properties": ["color, shape, height", "roof, color, blue"], "captions_pred_pc": ["of a black and white photo of a pair of earrings", "a black and white image of a building with dots"], "captions_pred_image": ["a black and white photograph of a metal sculpture on a pedestal", "a 3d model of a house with a steeple on top"], "question": "which building has a blue roof", "label": 1}, {"captions": ["a 3d white object resembling a knife, sword, and airplane.", " a small white airplane flying in the air."], "sample_ids": ["d88df1cb10da467bb6f77af6aeaa8f86", "747fb98f64794cdd96486debaf20a02c"], "properties": ["shape is cylinder, color is white, material is plastic", "airplane, color, white"], "captions_pred_pc": ["of a spike on a white background", "of a black and white image of a clock"], "captions_pred_image": ["a 3d model of a paper airplane", "a 3d model of a small airplane"], "question": "which object is white", "label": 1}, {"captions": ["pink and green pendant light hanging from a ceiling.", " of a white human skull with broken bone elements."], "sample_ids": ["1651a898288149edb8cbff0e1b2d692b", "6ca0f91b85464d7a845b3977351dd0b5"], "properties": ["color, pink, green", "color, white, skull"], "captions_pred_pc": ["above a black and white photo of a small circular object on a white background", "a black and white image of a cat's x-ray"], "captions_pred_image": ["a white pendant light hanging from the ceiling", "a 3d model of a human skull in white"], "question": "which entity is white", "label": 1}, {"captions": [" of a sword with a wooden handle.", " of a toy mushroom character with a white and brown head."], "sample_ids": ["bf448dbb4b6a43d89b2514929e8f7c43", "ae8a73809d4647c09cc82f403e47de1d"], "properties": ["handle, material, wood", "color, head, white and brown"], "captions_pred_pc": ["a black and white image of a sword on a white background", "a black and white illustration of a butterfly sitting on a dandelion stock illustration"], "captions_pred_image": ["a black and white image of a sword with two blades", "a 3d model of a gray and white cartoon character"], "question": "which entity has a white and brown head", "label": 1}, {"captions": [" a black square featuring two circles, one as a ring and another as a hole.", " a clay pot with holes in it."], "sample_ids": ["9b3ee92252ee493fb6cb8aeb39d88345", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["color, shape, size", "hole, material, clay"], "captions_pred_pc": ["in 15 words or less a black and white image of a shark's dorsal fin", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a black square on a white background royalty-free 3d model preview no. 3", "a clay sculpture of a face with holes in it"], "question": "which object is made of clay", "label": 1}, {"captions": [" of a blue ford escort car.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["fa3da8623db242c082e2915b12873186", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["color, blue, ford escort", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["of a black leather bracelet on a white background", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a classic car royalty free 3d model preview no 2", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": ["a gray background featuring a white line in the middle.", " a black and white cube-shaped building with a staircase."], "sample_ids": ["47f89f92bef14b7193d0ffa3934f6977", "587e65f2d904440488a98dfa9a4e9dbe"], "properties": ["color, line, gray", "shape is cube, color is black, white"], "captions_pred_pc": ["above a black and white image of a piece of furniture", "above a black and white photograph of a sculpture"], "captions_pred_image": ["an airplane flying in the sky with the sun shining behind it", "a black and white 3d model of a building"], "question": "which entity is a cube?", "label": 1}, {"captions": [" a small building with a roof.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["a3089017e4c5463d852de65abf61eda5", "c3a82df41875402285608ef13a55df57"], "properties": ["roof, building, small", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["in 15 words or less a black and white image of a building with dots all over it", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a black and white image of a small house", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a house featuring a wooden roof structure with trusses and beams.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["97e000ff41094665afd94ea565da8b13", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["roof, material, wood", "house, roof, wooden"], "captions_pred_pc": ["a black and white drawing of a floor plan", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of the roof of a building", "a black and white photograph of a birdhouse"], "question": "which house has a wooden roof", "label": 0}, {"captions": [" of white spheres resembling a molecule.", " a flying bird, resembling a crow and a pigeon."], "sample_ids": ["9d2c94d03ca745948b8cb4e8cafddb1c", "5ec78c8b6ab54f739adb0b46d216a454"], "properties": ["color, shape, number", "bird, resembles, crow, pigeon"], "captions_pred_pc": ["of a black and white 3d model of a molecule on a white background a black and white 3d model of a molecule on a white background royalty free illustration", "above a black and white illustration of an airplane on a white background"], "captions_pred_image": ["a 3d sculpture of a white ball on a gray background", "a black and white image of a bird in flight"], "question": "which entity is a bird?", "label": 1}, {"captions": [" a house with a pink roof, truss, and a square white ceiling lamp.", "a low poly of a plant on a white object, resembling a paper or plastic bag."], "sample_ids": ["91b2e9e4660946f5b4808a18b5323b69", "d49d8ed244094349a99e4faca05e0690"], "properties": ["roof, truss, lamp", "low poly, plant, white"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white illustration of a dandelion on a white background a black and white illustration of a dandelion on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a house with a metal roof", "a 3d model of a plant growing out of a rock"], "question": "which object is white", "label": 1}, {"captions": [" a small white box with a shelf and a hole in it.", " a four-legged metal workbench with shelves."], "sample_ids": ["d023ae78bc5a436eaba13c5ecdd45c56", "e93b633d477942d9b79ef8ab566473d6"], "properties": ["a, hole, shelf", "Four legs, Metal, Shelf"], "captions_pred_pc": ["a black and white drawing of a dotted square on a white background", "for a black and white illustration of a cross"], "captions_pred_image": ["a 3d model of a white box on a gray background", "a 3d model of a table with four legs"], "question": "which object has more shelves", "label": 1}, {"captions": ["low poly of a blue ice cube, resembling a pear-shaped sphere with a flame-like structure.", "a white 3d printed mickey mouse dice with various numbers and symbols on it."], "sample_ids": ["6813afb531d041e48532088d01b00db9", "e2645ac544844f3c981203134a99c30c"], "properties": ["- color is blue- shape is pear-like- structure is flame-like", "- material is plastic- shape is dice- color is white"], "captions_pred_pc": ["a black and white drawing of a starfish on a white background starfish on a white background royalty free illustration", "a circle of dots with the number 2 in the center"], "captions_pred_image": ["a low poly 3d model of a rock", "a 3d printed white dice with a mickey mouse face"], "question": "which object is made of plastic", "label": 1}, {"captions": [" of a black and white coffee table with a laptop, featuring a performance studio and ceiling grid structure.", " of a house with a roof truss, chimney, and suspended ceiling."], "sample_ids": ["a177693cc8c7428292680816001b48c6", "9401dfc901b2447a9c0eb27da56854d7"], "properties": ["black, white, laptop", "roof truss, chimney, suspended ceiling"], "captions_pred_pc": ["a black and white drawing of a room with dots on the floor", "in 15 words or less a black and white illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a"], "captions_pred_image": ["a dishwasher with a dish inside it", "a 3d model of a house with a roof"], "question": "which entity has a roof truss", "label": 1}, {"captions": ["a wooden-cased radio.", " a small white table with stairs and a ladder, featuring a black and white kitchen hood and a black square ceiling light."], "sample_ids": ["c79d1be9b9a0478993bee32c63231a88", "5565c16f297e405f9d5dbf0ebb623605"], "properties": ["case, material, wood", "table, stairs, ladder"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a computer screen", "above a black and white photograph of a small square in the center of the image"], "captions_pred_image": ["3d model of a vintage radio 3d model of a vintage radio", "a 3d model of a table with a stool on top"], "question": "which object has a ladder?", "label": 1}, {"captions": [" a room featuring a wall with a painting, a hole, and a door.", " a broken bowl with a hole in it."], "sample_ids": ["1d1328346a464d2482463d6d5288e934", "55e883c09dbf4f22bee0da608128f4f8"], "properties": ["painting, door, wall", "a, material, bowl"], "captions_pred_pc": ["in one hundred words or less an illustration of an igloo on a white background stock illustration", "a black and white drawing of a bowl on a white background"], "captions_pred_image": ["a black and white photograph of a torn piece of paper in the shape of a bird", "a 3d image of a white bowl on a gray background"], "question": "which object is made of a material that can be broken?", "label": 0}, {"captions": [" a leather recliner chair and ottoman set, featuring swivel functionality and available in modern orange, tan, and brown colors.", " a green mountain with trees and grass."], "sample_ids": ["44be138ae8e2409bbbca44a96fc67d45", "08fb23bdc67b4b0ba5fc64ea5c97e5f7"], "properties": ["color, tan, brown, orange", "mountain, grass, tree"], "captions_pred_pc": ["above a black and white illustration of an office chair", "in 15 words or less a black ink blot on a white background royalty free illustration"], "captions_pred_image": ["a grey leather lounge chair with ottoman and footstool", "a 3d model of a mountain with snow on it"], "question": "which entity has more grass", "label": 1}, {"captions": [" a small white house with a staircase and a window.", " a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light."], "sample_ids": ["9eb88d17310d42dda9e17883e9922525", "5ea0962b100b4fccb761ed84afe027b5"], "properties": ["house, staircase, window", "house, table, chair"], "captions_pred_pc": ["a black and white illustration of a person standing in the middle of a crowd of tiny black dots on a white background royalty free illustration", "above a black and white photograph of an open door"], "captions_pred_image": ["a 3d rendering of a small room with a staircase", "a 3d rendering of a small white table with a chair"], "question": "which house has a table and chair?", "label": 1}, {"captions": [" a multicolored metal building structure with a roof.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["22483891fd124baca3bbc6a6a49adc9c", "c3a82df41875402285608ef13a55df57"], "properties": ["color, roof, structure", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["of a black and white photo of a bike on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a barn structure", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a wooden door with a lock, handle, and a piece of paper on it.", " a large metal building with a roof and truss structure."], "sample_ids": ["78762b19b7dc4823a0033ec63f092ca5", "b85a99699ccd4bcba213322113bb253d"], "properties": ["door, lock, handle", "roof, truss, structure"], "captions_pred_pc": ["in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration", "of a metal grate on a white background"], "captions_pred_image": ["a black and white image of a door with a crack in it", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": [" a small toy bomb with a blue sphere and two legs.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["c8ea472ef90f4df4ad1d98e02e1e4dfe", "a17477b445b3443189dad22f768b888b"], "properties": ["color, shape, material", "roof, pillar, stairs"], "captions_pred_pc": ["of a black and white image of a glass teapot", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d model of a small, white ball", "a 3d model of a small building with a balcony"], "question": "which entity is a building?", "label": 1}, {"captions": [" a small house with a tree, pool, and pond in a green and blue landscape.", " a snowy city with buildings and a plane flying overhead."], "sample_ids": ["e22489ba182f45cb81f0a83f22abe9bd", "cc63ceb2b5e84872a1a1f6423de419e2"], "properties": ["house, tree, pool", "building, plane, city"], "captions_pred_pc": ["in 15 words or less a black and white image of a square with dots around it", "a black and white photo of an airplane on a white background"], "captions_pred_image": ["a 3d model of a house and a tree in a box royalty free 3d model preview no.3", "a 3d model of a city in black and white"], "question": "which entity has a plane flying over it", "label": 1}, {"captions": [" of a wooden table with two barrels.", " of a wooden table and chair."], "sample_ids": ["8bba7567253040d88973320107e48055", "884d232f9cd54b6c8c41b6bf357c704f"], "properties": ["table, barrels, wood", "table, chair, wood"], "captions_pred_pc": ["in 15 words or less a black circle on a white background", "a black and white pattern on a white background"], "captions_pred_image": ["a 3d model of a barrel table royalty free 3d model preview no.2", "a 3d model of a small wooden table"], "question": "which table is made of wood", "label": 1}, {"captions": [" a small house with stairs and a roof.", " a bee and a rubik's cube next to each other."], "sample_ids": ["e9305c80010f4e3b9de9789f01a9bee5", "83a27b2b104e4f7f8b42c3c8654153db"], "properties": ["roof, stairs, house", "bee, color, black and white"], "captions_pred_pc": ["above a black and white image of a square with dots all over it", "a black and white pattern of dots on a white background a black and white pattern of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a podium on a wooden floor", "a black and white image of a bee next to a cube"], "question": "which entity is black and white", "label": 1}, {"captions": [" a white hand, ear bud, teddy bear, earpiece, and utensil.", " an african figurine with a bow and arrow, standing on a wooden base, wearing a hat."], "sample_ids": ["1c59287d496f4da6b245a01d25a7e2a4", "d81d13362ae04371bb2cba46e4939665"], "properties": ["earbud, earpiece, earpiece", "hat, bow, arrow"], "captions_pred_pc": ["a black and white drawing of a hand holding a cup", "above a black and white photo of an ice sculpture"], "captions_pred_image": ["a white object on a grey background", "a sculpture of an african man sitting on a pedestal"], "question": "which entity has a hat?", "label": 1}, {"captions": [" of a plague mask with a rusty, horned, wooden helmet and a crow's head design.", " a clay pot with holes in it."], "sample_ids": ["2b0896f810074399a5ae7d6dbab8c330", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["- material is wood, rusty, horned", "hole, material, clay"], "captions_pred_pc": ["in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["3d model of a plague doctor's mask", "a clay sculpture of a face with holes in it"], "question": "which object is made of clay", "label": 1}, {"captions": [" a tree with a blue ball and a mushroom.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["4bb8528bd042471f8865cce122a03924", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["a, ball, mushroom", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white illustration of an airplane flying over a tree", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a tree with a ball on top of it royalty free 3d model preview no.2", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a white and silver human torso sculpture with rocks.", "a featuring a building, a square, a cloud with a square in the middle, a group of people in a room and a field, and clouds floating in the sky."], "sample_ids": ["3978258c3f26401681c6e44b404e2cca", "8557a15b9f244d2cbf16786dbc8b7b25"], "properties": ["color, material, texture", "building, room, sky"], "captions_pred_pc": ["above a black and white drawing of a skull", "above a black and white image of a person's hand holding a paintbrush"], "captions_pred_image": ["a 3d sculpture of a rock formation on a white background", "a 3d rendering of white clouds floating in the air"], "question": "which entity has a room?", "label": 1}, {"captions": [" a white rocket ship.", " of a plague mask with a rusty, horned, wooden helmet and a crow's head design."], "sample_ids": ["22137b9fff744310ad3b4abe6d869718", "2b0896f810074399a5ae7d6dbab8c330"], "properties": ["color, shape, size", "- material is wood, rusty, horned"], "captions_pred_pc": ["above a black and white illustration of a planet", "in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a rocket ship royalty free 3d model preview no.1", "3d model of a plague doctor's mask"], "question": "which object is made of wood", "label": 1}, {"captions": [" of a house with multiple roofs and a suspended ceiling, featuring a white and purple tray.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["7907916e8f524df5b16eb566497db83e", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["color, roof, tray", "camera, speaker, ceiling fan"], "captions_pred_pc": ["a black and white image of a metal object", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": [" of a meat skewer with a small piece of bread and a sausage on a stick.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["1728f2cb8eca4080af02b22262ff45d5", "c3a82df41875402285608ef13a55df57"], "properties": ["meat, bread, sausage", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white image of a brush on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["an image of a small white object on a gray background", "a white plastic object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a white featuring a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet.", " a small house with a roof and door, resembling a shack or shed."], "sample_ids": ["f28f57745af04115b0bacdde80c3b9ee", "f1b557775310478893242180defa4d80"], "properties": ["a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet", "shack, roof, door"], "captions_pred_pc": ["a black and white illustration of a pair of sunglasses", "a black and white illustration of a telephone on a white background"], "captions_pred_image": ["a 3d model of a medieval drinking horn 3d model available for 3d printing royalty free 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d", "a 3d model of a small house in the middle of a field"], "question": "which entity is a shack?", "label": 1}, {"captions": [" a building with wooden and steel structures, featuring stairs, railings, and a ceiling with numerous pipes.", " a house with a blue roof, chimney, and wooden-beamed ceiling."], "sample_ids": ["26c47880756b4876b4f263373c3c5303", "b380dd4800124a8d96424a504eb0ec6a"], "properties": ["building, material, steel", "roof, color, blue"], "captions_pred_pc": ["a black and white drawing of a floor plan", "of a white lace clutch purse on a white background"], "captions_pred_image": ["a 3d model of a large structure with multiple levels", "a 3d model of a building with many windows"], "question": "which building has a blue roof", "label": 1}, {"captions": [" a house with a yard, trees, bushes, and surrounding buildings.", " a large steel building with a pool."], "sample_ids": ["7f8942ef51dd4246993a587a12df168c", "72eed67d8d884c819b28e2e95eb48f06"], "properties": ["house, yard, surrounding buildings", "building material, pool, steel"], "captions_pred_pc": ["a black and white image of a truck on a white background", "a black and white illustration of a building with a tree growing out of it"], "captions_pred_image": ["a 3d model of a house in the middle of a field", "a 3d model of a concrete structure"], "question": "which building has a pool", "label": 1}, {"captions": [" a house featuring a purple roof and a suspended ceiling with a light fixture.", " a house with a roof structure and toothbrushes."], "sample_ids": ["579b43057ef74bd08d06bdd3e8d973a0", "7632d1ba4e8144c19484c263b6074d0c"], "properties": ["roof, purple, suspended", "house, roof, toothbrushes"], "captions_pred_pc": ["a black and white image of a piece of paper with a pattern of dots on it", "a black and white illustration of the letter 'b' isolated on a white background illustration"], "captions_pred_image": ["a 3d model of the roof of a building royalty free 3d model no.", "a 3d rendering of a white box with a lot of blades"], "question": "which house has a roof structure", "label": 1}, {"captions": [" featuring a pink and white dress, a purple and white vase, a box, and a coffee cup with a purple flower.", "3d white playground set with slides and swings, featuring a plane, building, and dragon in the sky."], "sample_ids": ["ec2de6c604e44e6782ffab0c46daf33b", "e694d53545d449319a64cceb0280c3c6"], "properties": ["a, dress, flower", "3d, slide, swing"], "captions_pred_pc": ["a black and white drawing of a tea bag in the shape of a flower on a white background a black and white drawing of a tea bag in the shape of a flower on a white background royalty free illustration", "for a 3d model of the letter 'j'"], "captions_pred_image": ["a 3d model of a vase and a book next to each other", "a 3d model of a playground slide"], "question": "which entity has a plane", "label": 1}, {"captions": [" a pink-framed building structure with beams and trusses.", "a white of a man with arms outstretched."], "sample_ids": ["18e392c5360146eda498c5edab25b15c", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["frame, beams, trusses", "image, color, white"], "captions_pred_pc": ["a black and white drawing of a metal grate", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a 3d model of a building under construction", "a 3d model of a man with his arms outstretched"], "question": "which entity is a picture of a man?", "label": 1}, {"captions": ["a 3d printed green robot with two arms and two legs.", " a rusty green metal box with a handle and a gun inside."], "sample_ids": ["04e3e8ce541e487b9e342570fe1b4eb2", "76cfd0e88ce243d483919a018a4f1a9e"], "properties": ["color is green, material is 3d printed, number of limbs is 2", "box, handle, gun"], "captions_pred_pc": ["a black and white image of a robot in the shape of a snowflake", "a black and white square with dots on a white background"], "captions_pred_image": ["a 3d model of a futuristic robot standing on its hind legs", "a 3d rendering of a metal box with a handle"], "question": "which object is made of metal", "label": 1}, {"captions": [" a snowy small village with farm buildings and a fence.", " a small, rocky island with diverse terrain and scattered rocks."], "sample_ids": ["6bb669534ccc434f9ab4d7b39bae3510", "09f2cf267e954c958828325067bcc36a"], "properties": ["building, fence, snowy", "island, terrain, rocks"], "captions_pred_pc": ["a black and white drawing of a boat on the water", "above a black and white photo of a small island in the middle of a body of water"], "captions_pred_image": ["a 3d model of a small village in the snow royalty free 3d model preview no. 3", "a black and white image of a piece of dirt on the ground"], "question": "which entity has more diverse terrain", "label": 1}, {"captions": ["a white of a building with columns, stairs, and railings.", " a clay pot with holes in it."], "sample_ids": ["c9ad30f336844b629cb237fa5b0d94f2", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["image, building, stairs", "hole, material, clay"], "captions_pred_pc": ["a black and white image of dots on a white background a black and white image of dots on a white background royalty free illustration", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a multi-level building with stairs and balconies royalty-free 3d model no.", "a clay sculpture of a face with holes in it"], "question": "which object is made of clay", "label": 1}, {"captions": [" a room featuring a table and chairs, with blue and green walls.", " a small house on an island with a boat and a bird on a rock."], "sample_ids": ["a49899d9a6194583b745e02f3654841e", "1e56a92a0ddc41e59694bd1ad1656149"], "properties": ["color, table, chairs", "house, rock, bird"], "captions_pred_pc": ["a close-up of a white object on a white background", "a black and white drawing of a boat in the middle of a body of water"], "captions_pred_image": ["a 3d rendering of a room with a white wall", "a 3d rendering of a house on a rock"], "question": "which entity has a bird on a rock?", "label": 1}, {"captions": [" featuring a chair, table, and refrigerator.", " a small house with a tree in front, situated on a hill."], "sample_ids": ["cd967e38e9364ed28c4090e905740c9d", "3bde44b5f10946398f1bb9843dc14caa"], "properties": ["chair, table, refrigerator", "house, tree, hill"], "captions_pred_pc": ["a 3d rendering of a white surface with black dots on it", "a black and white photo of a cell phone in a puddle of water"], "captions_pred_image": ["a 3d model of a chair, a table, and a refrigerator", "a 3d model of a house in the middle of a field"], "question": "which entity is situated on a hill", "label": 1}, {"captions": [" a clear glass table with metal legs and balls on top.", " of a small white building with stairs and a lid."], "sample_ids": ["7c2bfa826f274377ac21f48d510848c3", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["glass, metal, balls", "building, stairs, lid"], "captions_pred_pc": ["a black and white image of a wine glass", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a clear acrylic foosball table", "a 3d model of a white box on a gray background"], "question": "which entity has a lid", "label": 1}, {"captions": [" of a mushroom cloud with white tree and coral plant elements, featuring blue and green leaves.", " a large house with a roof on a platform."], "sample_ids": ["bca5233d878e4cf09b5bc2bb6f3915b0", "cb3e09a301b746918a682a595037c7f7"], "properties": ["color, shape, texture", "roof, platform, house"], "captions_pred_pc": ["a black and white image of a square with dots on it", "a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of a tree made out of white flowers", "a 3d model of a small house"], "question": "which entity has a roof", "label": 1}, {"captions": [" a metal-framed wall with red and blue bars in a steel structure.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["fefc99453e2d4406a9668d5697224c0f", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["color, red, blue, structure", "island, mountain, grass"], "captions_pred_pc": ["a black and white image of a person holding a toothbrush", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d rendering of a metal frame structure", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more grass", "label": 1}, {"captions": ["a golden padlock with numbers and the word \"master\" on it.", " of a wine corkscrew"], "sample_ids": ["006d1922549f4f83a87158c46c8f8ea8", "07047b273add4f6fb2075fd176a50cd9"], "properties": ["size, material, color", "a, type, corkscrew"], "captions_pred_pc": ["of a padlock on a white background padlock on a white background stock illustration", "a black and white image of a corkscrew"], "captions_pred_image": ["a 3d model of a padlock with numbers on it", "a 3d model of a wine bottle opener"], "question": "which object is a type of corkscrew", "label": 1}, {"captions": [" a house featuring a pink roof with purple tiles, wooden beam ceilings, and interior elements including a bed with a purple frame and a table with purple slats.", " of a white chest of drawers with legs."], "sample_ids": ["09561cc68a84496bb14b75c0f516f089", "f00dfa8b5e7e4fc6bbf97d718b66f390"], "properties": ["roof, color, purple, beams, wooden, bed, frame, table, slats, purple", "chest of drawers, legs, white"], "captions_pred_pc": ["a black and white image of a square with a pattern of dots", "of a black and white leopard print rug"], "captions_pred_image": ["a 3d model of a table with a grid pattern", "a 3d rendering of a white dresser"], "question": "which object has legs", "label": 1}, {"captions": ["a 3d white arrow, letter s, toothpick, skateboard, and knife forming a logo.", " a long row of steel shelves in a warehouse, featuring a suspended scaffolding system."], "sample_ids": ["2ee9dcd863514073a849ece8ea7714dd", "578fe7a7bd754b889be33aea99cf5050"], "properties": ["3D, toothpick, skateboard", "a, material, steel"], "captions_pred_pc": ["above a black and white image of a person's hand holding a pencil", "above a black and white image of a rack with multiple shelves"], "captions_pred_image": ["a white toothbrush on a gray background", "a 3d model of a large metal structure"], "question": "which object is made of steel", "label": 1}, {"captions": ["a 3d cartoon character wearing a hat and holding various items, such as a sword, gun, broomstick, and stick.", " a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light."], "sample_ids": ["9863dee114264b89a95dca4c78d08424", "5ea0962b100b4fccb761ed84afe027b5"], "properties": ["hat, sword, gun", "house, table, chair"], "captions_pred_pc": ["for a black and white illustration of a small monster holding a spoon in one hand and a cup of coffee in the other", "above a black and white photograph of an open door"], "captions_pred_image": ["a 3d model of a police officer holding a baton and wearing a hat and helmet", "a 3d rendering of a small white table with a chair"], "question": "which entity has a table and chair?", "label": 1}, {"captions": [" of a set of ladders.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["5152e8c6dd094e49b02b54ef1cd2fabc", "06a1c233fb444830b577aa06e2c01294"], "properties": ["a, set, ladders", "house, tree, hill"], "captions_pred_pc": ["above a white wall with a black square on it", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a 3d model of a shelving unit with multiple shelves", "a black and white image of a house in the middle of a field"], "question": "which entity has a house on a hill?", "label": 1}, {"captions": [" of a white building with a small house and a desk with a laptop.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["9244a2d3a9e94c8398ef991f1661bb58", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["a, desk, laptop", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white image of a piece of furniture", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of an office desk on a white background", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a small white box with a shelf and a hole in it.", " of a white building with a small house and a desk with a laptop."], "sample_ids": ["d023ae78bc5a436eaba13c5ecdd45c56", "9244a2d3a9e94c8398ef991f1661bb58"], "properties": ["a, hole, shelf", "a, desk, laptop"], "captions_pred_pc": ["a black and white drawing of a dotted square on a white background", "a black and white image of a piece of furniture"], "captions_pred_image": ["a 3d model of a white box on a gray background", "a 3d model of an office desk on a white background"], "question": "which entity has a laptop?", "label": 1}, {"captions": ["a featuring a rock formation with various statues, including a woman, an eagle, and elements like wood and a shell.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["36d90269173b4d1a84dbd61664593f66", "c3a82df41875402285608ef13a55df57"], "properties": ["a, eagle, wood", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white illustration of a map with dots all over it", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a person sitting on the edge of a cliff", "a white plastic object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a large house/building structure with a roof.", " a house with a blue roof, chimney, and wooden-beamed ceiling."], "sample_ids": ["82859e4c6d4e4bbea94b6252bef1d398", "b380dd4800124a8d96424a504eb0ec6a"], "properties": ["roof, structure, house", "roof, color, blue"], "captions_pred_pc": ["a black and white photograph of a metal sculpture", "of a white lace clutch purse on a white background"], "captions_pred_image": ["a 3d model of a large white structure", "a 3d model of a building with many windows"], "question": "which structure has a roof", "label": 1}, {"captions": [" of a gray stereo system with a blue drawer, resembling a printer and computer with a blue screen, featuring a cd player.", " a large metal building with a roof and truss structure."], "sample_ids": ["3d4d965e67744415b69ff6aaeb11f420", "b85a99699ccd4bcba213322113bb253d"], "properties": ["color, screen, drawer", "roof, truss, structure"], "captions_pred_pc": ["above a black and white image of a brush", "of a metal grate on a white background"], "captions_pred_image": ["a 3d model of a printer on a white background royalty free 3d model no.2", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": ["white of a small stool and toilet paper holder.", " a flying bird, resembling a crow and a pigeon."], "sample_ids": ["b7b32b690a2942878e3bd386de75d29d", "5ec78c8b6ab54f739adb0b46d216a454"], "properties": ["color, white, stool", "bird, resembles, crow, pigeon"], "captions_pred_pc": ["a black and white illustration of an object in the shape of an open umbrella", "above a black and white illustration of an airplane on a white background"], "captions_pred_image": ["a 3d model of a toilet paper holder royalty free 3d model preview no. 1", "a black and white image of a bird in flight"], "question": "which entity is a bird?", "label": 1}, {"captions": [" featuring a table with objects, a stone wall with a ball, and a teapot and vase on a tiled floor.", " of a character wearing glasses and a hat."], "sample_ids": ["654c49e07bb54e6b94637b5f7b65bf08", "032e7ce682ff43d5aa5ca4fd34eacf14"], "properties": ["floor, table, wall", "hat, glasses, character"], "captions_pred_pc": ["a black and white illustration of a coffee cup, a spoon, and a knife on a black background", "in 15 words or less a silhouette of a bell on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a bench on a tiled floor royalty free 3d model preview no.3", "a 3d model of a person wearing sunglasses"], "question": "which entity has a hat?", "label": 1}, {"captions": [" of a wooden cabinet with drawers and filing features.", "a featuring a building, a square, a cloud with a square in the middle, a group of people in a room and a field, and clouds floating in the sky."], "sample_ids": ["abbc90bbd5474f73b16482ccd10e07ec", "8557a15b9f244d2cbf16786dbc8b7b25"], "properties": ["Cabinet, Drawers, Filing", "building, room, sky"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "above a black and white image of a person's hand holding a paintbrush"], "captions_pred_image": ["a 3d rendering of a white wooden chest of drawers", "a 3d rendering of white clouds floating in the air"], "question": "which entity has a room?", "label": 1}, {"captions": [" a room featuring a wall with a painting, a hole, and a door.", " a house with a flat roof structure."], "sample_ids": ["1d1328346a464d2482463d6d5288e934", "abc52d210d71415296730bb00352ce6f"], "properties": ["painting, door, wall", "roof, flat, structure"], "captions_pred_pc": ["in one hundred words or less an illustration of an igloo on a white background stock illustration", "a black and white drawing of a window with dots around it"], "captions_pred_image": ["a black and white photograph of a torn piece of paper in the shape of a bird", "a 3d model of a house with a roof"], "question": "which structure has a flat roof", "label": 1}, {"captions": ["a 3d white object featuring stacked racks, toy train, blocks, plastic pipes, lego pieces, and clothes hangers.", " a house with a roof and beams."], "sample_ids": ["c9c786f133f54d5e8d99bfc1a588df41", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["stacked, racks, toy train", "roof, beams, house"], "captions_pred_pc": ["a black and white photo of a person standing in front of a white wall", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a ship's propeller on a white background", "a 3d model of a building with a roof"], "question": "which object has a roof?", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["color, white, black, white", "island, terrain, water"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": ["a 3d white box with an open door and lid.", " of a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["4e95f0eca97f48d6af1888a8bacec9f6", "8cd3f5ff0fc041eca9a480faa6739480"], "properties": ["- color is white - shape is box - material is plastic", "roof, trusses, beams"], "captions_pred_pc": ["a black and white square with dots all over it", "in 15 words or less a black and white drawing of a window"], "captions_pred_image": ["a 3d rendering of a white box with an open lid", "a 3d model of a roof structure"], "question": "which entity is made of wood", "label": 1}, {"captions": [" of a mannequin head wearing a leather plague mask with straps.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["7821b30c2f8545ccac3e8b8a305d5082", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["mannequin head, plague mask, straps", "island, terrain, water"], "captions_pred_pc": ["above a black and white image of a person's face with dots", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a plague doctor's mask on a mannequin head", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" a pink and gold robotic woman's hand with purple flowers.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["accfe13a2bd448828e45c790118e1b7d", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["color, hand, flowers", "camera, speaker, ceiling fan"], "captions_pred_pc": ["a black and white illustration of a hand", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a human leg with a broken ankle royalty-free 3d model preview", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": ["a pile of mint green soap cubes.", " a small white house with windows and a black lid."], "sample_ids": ["edd35e0657b640b1b8fcf86942e1a9e5", "4b40af369c1149949f5ccb68becd8430"], "properties": ["color, mint, soap", "white, windows, lid"], "captions_pred_pc": ["a black and white illustration of a group of dots on a white background", "above a black and white image of dots on a white background"], "captions_pred_image": ["a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background", "a 3d model of a white house with three windows"], "question": "which object has a lid", "label": 1}, {"captions": [" a large building featuring stairs, a clock tower, a balcony, and a roof.", " a black building."], "sample_ids": ["e7c78316f9cb4b8aad57a9c933f5278b", "88702656e9684e1ea1a01dc7075c00e0"], "properties": ["building, roof, balcony", "color, black, building"], "captions_pred_pc": ["a black and white illustration of a group of dots in the shape of a square on a white background royalty free illustration", "a close up of a black and white rug on a white background"], "captions_pred_image": ["a 3d model of a building with a clock tower", "a black 3d model of a house on top of a blueprint"], "question": "which building is black", "label": 1}, {"captions": [" of a multi-colored spooling machine with wires and circuit board, featuring a lamp and shelf with toys.", " a flying bird, resembling a crow and a pigeon."], "sample_ids": ["0e3f5cc16806492b948d41a748819ce3", "5ec78c8b6ab54f739adb0b46d216a454"], "properties": ["color, shape, material", "bird, resembles, crow, pigeon"], "captions_pred_pc": ["a black and white image of a decorative tile", "above a black and white illustration of an airplane on a white background"], "captions_pred_image": ["a 3d rendering of an electronic device on a white surface", "a black and white image of a bird in flight"], "question": "which entity is a bird?", "label": 1}, {"captions": [" of a hammer with a metal handle, compatible with 3ds max, maya, blender, and other ing and animation software.", " a rusted metal barrel with a yellow and red warning sign and stripe on it."], "sample_ids": ["5d0b9c038a1847f29384ec09cffc43d9", "5a49ad82ef7a4d33badea2261720f518"], "properties": ["- material is metal - weight is 0 - height is 0", "rusty, warning, metal"], "captions_pred_pc": ["a black and white illustration of a person flying a kite", "a black and white drawing of dots on a white background a black and white drawing of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a metal clamp with a spike on top of it", "a black and white photograph of a barrel"], "question": "which entity is made of metal", "label": 1}, {"captions": [" of a blue tarp, flower, small island with a boat and phone, and a tent with a blue blanket.", " a house with a green, wooden-structured roof."], "sample_ids": ["94704d86c22c4bdfb86ac24979926066", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["blue tarp, flower, small island", "roof, color, green"], "captions_pred_pc": ["above a 3d rendering of a fish in the air", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a black and white image of a piece of paper on the ground", "a 3d model of a house with a triangular roof"], "question": "which roof is green", "label": 1}, {"captions": [" a snowy small village with farm buildings and a fence.", " a child's room in a small house with windows."], "sample_ids": ["6bb669534ccc434f9ab4d7b39bae3510", "88847a6445044bcbab9611e6028a19b9"], "properties": ["building, fence, snowy", "room, house, windows"], "captions_pred_pc": ["a black and white drawing of a boat on the water", "for a black and white drawing of a snowflake"], "captions_pred_image": ["a 3d model of a small village in the snow royalty free 3d model preview no. 3", "a 3d model of a child's room with toys and furniture in it royalty free 3d model preview no.1"], "question": "which entity has more windows", "label": 1}, {"captions": [" a modern white house with furniture and a black accent.", " a house with wooden framing and trusses."], "sample_ids": ["e5025a1ca0034b1aa97a0d42edeeae0f", "4501794e257c4a8ba60a94757d8e93a9"], "properties": ["color, white, furniture, black", "frame, trusses, wood"], "captions_pred_pc": ["above a black and white drawing of a floor plan", "a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a white building with black and white tiles", "a 3d model of a house under construction"], "question": "which house is made of wood", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", " a building with a metal and wooden pole structure."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "e2e2ab4474b84f33809979da457eedd9"], "properties": ["yellow, table, roof", "structure, material, pole"], "captions_pred_pc": ["a black and white drawing of a floor plan", "a black and white illustration of a line of dots on a white background"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of a structure with multiple tables and chairs"], "question": "which structure is made of metal and wooden poles", "label": 1}, {"captions": [" a small house with a tree and a rock.", " of two rocks with ice elements."], "sample_ids": ["9dc392a7f6e444e5bfb720684d6f864a", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["house, tree, rock", "image is a rock with ice elements"], "captions_pred_pc": ["in 15 words or less the image depicts a white square on a white background with black dots all over the square stock illustration", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a small house with a tree in front of it", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a house featuring a pink-purple roof with trusses and wooden ceiling beams.", "a black and gold cylindrical object with a gold handle, resembling a cigarette lighter, lamp, and pen."], "sample_ids": ["b6b6a3f82bdd47c3afaf9af885ba8703", "c9b1c89380e947f58aa06eb56c93c6d8"], "properties": ["roof, trusses, beams", "- color is black and gold- shape is cylindrical- material is metal"], "captions_pred_pc": ["a black and white pattern of dots in the shape of a square royalty free illustration", "a black and white image of a circular object on a white background"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d model of a black and white object on a gray background"], "question": "which object is made of metal", "label": 1}, {"captions": [" a brick building with a roof structure and roof truss.", " a potted christmas pine tree."], "sample_ids": ["84e8acad28664a738df69d719df9e263", "460c8f3034a844159826fac3b8aa35a5"], "properties": ["roof, structure, truss", "a, color, green"], "captions_pred_pc": ["a black and white polka dots pattern on a white background polka dots pattern on a white background illustration", "a black and white illustration of a snowflake on a white background"], "captions_pred_image": ["a 3d model of a brick building with a roof", "a 3d model of a christmas tree in a vase"], "question": "which object is green", "label": 1}, {"captions": ["a featuring a bowl of soup, a brush, a knife, a slice of pizza, a torn piece of paper, and a roof with a hole in it.", " a large house with a roof on a platform."], "sample_ids": ["db19e46828c94b1a8b9a6ca9f673c604", "cb3e09a301b746918a682a595037c7f7"], "properties": ["a, roof, soup", "roof, platform, house"], "captions_pred_pc": ["in 15 words or fewer a black and white illustration of the moon", "a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of a bowl and chopsticks on a sheet of paper", "a 3d model of a small house"], "question": "which entity has a roof", "label": 1}, {"captions": ["a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it.", " a concrete wall with peeling paint and rusted metal features."], "sample_ids": ["9bcb7cc44b444326bc426cd9e2aacf60", "4376831ff557462dbacc4cce88a8cc86"], "properties": ["- material is plastic- color is green- shape is box", "paint, rust, concrete"], "captions_pred_pc": ["a black and white illustration of a toilet brush and toilet brush holder", "above a black and white image of a shelf on a white background"], "captions_pred_image": ["a 3d rendering of a plastic box with several compartments", "a 3d model of a concrete wall"], "question": "which object is made of rust", "label": 1}, {"captions": [" of a white round table with a red accent.", " a stone throne with stairs and a tree, accompanied by a concrete wall featuring a statue and a fireplace with a shelf above it."], "sample_ids": ["fea62a865b4e40899d95785533818329", "93fb4197f0014f7582029af24c7ed9de"], "properties": ["color, white, red", "throne, stairs, tree"], "captions_pred_pc": ["above a black and white photograph of a person hanging from the ceiling", "in 15 words or less a black and white image of a toilet paper roll on a white background royalty free illustration"], "captions_pred_image": ["an empty white plate on a gray background", "a 3d model of a throne with a tree on it"], "question": "which entity has more stairs", "label": 1}, {"captions": [" of a small island featuring a white lighthouse, a fountain, and a grassy crater.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["2a30e69498ff4fd1a33c1fb72286f553", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["lighthouse, fountain, crater", "camera, speaker, ceiling fan"], "captions_pred_pc": ["a black beanie with sparkles on a white background", "for a black and white image of an object on a white background"], "captions_pred_image": ["a black and white image of an object on top of a pedestal", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker", "label": 1}, {"captions": [" a small white archway structure resembling a building.", "a 3d object featuring a fish with a long nose and open mouth, a cat head with red, yellow, and green stripes, a frog with a long snout, a white horse with green and yellow stripes, and a wolf mask."], "sample_ids": ["5ad02458cf394134a902e25001d2ffef", "276699bb0f974c47b4e2954cfcd1651c"], "properties": ["structure, building, archway", "a, color, white"], "captions_pred_pc": ["for a black and white illustration of a castle on a hill", "a black and white image of a skull in the shape of a butterfly"], "captions_pred_image": ["a 3d rendering of a white object on a white surface", "a 3d model of an animal with a long nose"], "question": "which entity is not a building?", "label": 0}, {"captions": ["a small white wooden castle with flags, turrets, and two towers.", " a house with a flat roof structure."], "sample_ids": ["345b1c7d2d9b4524aa0dcdc3cd27e4da", "abc52d210d71415296730bb00352ce6f"], "properties": ["turrets, flags, towers", "roof, flat, structure"], "captions_pred_pc": ["a black and white illustration of a square made up of many small dots on a white background", "a black and white drawing of a window with dots around it"], "captions_pred_image": ["a 3d model of a castle with towers and a drawbridge royalty-free 3d model preview no.1", "a 3d model of a house with a roof"], "question": "which structure has a flat roof", "label": 1}, {"captions": [" a concrete wall with peeling paint and rusted metal features.", " of two rocks with ice elements."], "sample_ids": ["4376831ff557462dbacc4cce88a8cc86", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["paint, rust, concrete", "image is a rock with ice elements"], "captions_pred_pc": ["above a black and white image of a shelf on a white background", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a concrete wall", "a 3d image of two rocks on a gray surface"], "question": "which entity is a rock?", "label": 1}, {"captions": ["a featuring a tree stump, mossy wood, leaves, and a rock with grass.", " a red mushroom on top of various piles, including dirt, leaves, wood, and garbage."], "sample_ids": ["2527cd763a1a43f9870eb65e44e79f7d", "21d3bd3978f74308a312b4379e665dbd"], "properties": ["mossy, rock, grass", "red, pile, garbage"], "captions_pred_pc": ["a black and white image of a person on a skateboard", "above a black and white illustration of a triangle"], "captions_pred_image": ["a 3d model of a piece of wood on a white background", "a black and white image of a mushroom in a pile of leaves"], "question": "which entity has a mushroom on top of various piles", "label": 1}, {"captions": [" a small white barn with a metal roof.", "a white of a spaceship and building."], "sample_ids": ["4ca3342a96824684845f7d0e062ab176", "bf7d4277c9184d35abdec85bd5e25956"], "properties": ["roof, metal, white", "image, building, spaceship"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a house made of dots", "a black and white drawing of a tree"], "captions_pred_image": ["a 3d model of a barn", "a 3d model of a white object on a gray background"], "question": "which image shows a spaceship and building?", "label": 1}, {"captions": [" of a white chair with arms and legs.", " a small purple plastic chair with holes."], "sample_ids": ["61ec56afad7a45deb99ccf1ab1bd2d73", "fe2bf0f8f5c64dd6bac3e2da0d1b89d0"], "properties": ["Arms, Legs, Color", "color, plastic, purple"], "captions_pred_pc": ["of a 3d rendering of a chair in the style of the 1920s and 1930s", "this image may contain clothing apparel accessory purse bag and handbag"], "captions_pred_image": ["a 3d model of a white outdoor chair royalty free preview no 3", "3d model of a chair royalty free 3d model preview no 3"], "question": "which chair is made of plastic", "label": 1}, {"captions": ["a white plastic bottle with a lid and cap.", "a small white bowl with a light blue glaze and blue rim."], "sample_ids": ["f7e60e3a8ee84ad0954d288c3f1a7220", "6f7201fbb58649379398a8d1d5c0cc7a"], "properties": ["color is white, material is plastic, shape is bottle", "color, blue, rim, blue"], "captions_pred_pc": ["of a black circular object on a white background", "a black and white drawing of a dotted circle on a white background"], "captions_pred_image": ["a 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a grey background 3d printed bottle on a", "a white bowl on a gray background"], "question": "which object is made of ceramic?", "label": 1}, {"captions": [" of a rock formation with a white cliff and a rock.", " of a stone wall with a window and multiple stone arches."], "sample_ids": ["4a25f6dfbea943bca137dacd2f7b984f", "db74ee1621464be1b164be26a1af050e"], "properties": ["image is rock formation with a white cliff and a rock", "window, arches, wall"], "captions_pred_pc": ["above a black and white map of spain on a white background", "a black and white illustration of a bolt and nut on a white background a black and white illustration of a bolt and nut on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of a rock formation on a gray background", "a 3d model of an old brick wall"], "question": "which entity is a building?", "label": 1}, {"captions": [" a church building at night with a projection, snow, and a window, featuring a hand with a skeleton on it.", " a house with a roof and beams."], "sample_ids": ["4389909667db472ea9a57a70eb940fa4", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["building, window, skeleton", "roof, beams, house"], "captions_pred_pc": ["a black and white image of a piece of paper", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a black and white photo of a person's reflection in a mirror", "a 3d model of a building with a roof"], "question": "which building has a roof and beams", "label": 1}, {"captions": [" of a wooden windmill with a red roof.", " a white building with a red roof."], "sample_ids": ["2ad8fca30285483d8b7f602fa078215d", "1f9580be397d4f948bf53fe1d5bc5756"], "properties": ["roof, color, red", "color, white, roof, red"], "captions_pred_pc": ["a white and black image of a snowflake in the shape of a snowflake", "in 15 words or fewer a black and white drawing of a camera"], "captions_pred_image": ["a 3d model of a windmill on a gray background", "a 3d model of an office building"], "question": "which building has a red roof", "label": 1}, {"captions": [" a yellow monster-sphere with teeth.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["fc5f906bab2c4c36a406ebdc15f58541", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["color, shape, texture", "island, terrain, water"], "captions_pred_pc": ["a black and white illustration of a dandelion on a white background dandelion illustration on a white background illustration", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a white ball with teeth", "a 3d image of a small island in the middle of a lake"], "question": "which entity has a rocky terrain", "label": 1}, {"captions": ["a 3d-printed blue ring with holes in it.", " a colorful, wire-framed building structure resembling a cube."], "sample_ids": ["9adb0b6d88ec466d8df8c84ead5186a1", "62b7c7c684044d998fee9ff35beeb79b"], "properties": ["color, material, shape", "color, frame, shape"], "captions_pred_pc": ["a black and white image of a leopard print design on a white background", "a black and white illustration of a building made up of dots"], "captions_pred_image": ["a 3d rendering of a circular object with multiple holes in it", "a 3d model of a building structure"], "question": "which entity is a cube?", "label": 1}, {"captions": [" of a cup, bottle, and two metal buckets on a chessboard with a square ceiling light fixture.", " of a cup, bottle, and two metal buckets on a chessboard with a square ceiling light fixture."], "sample_ids": ["05b5a5da1a0a4c1fa60a9e5edd5c3424", "05b5a5da1a0a4c1fa60a9e5edd5c3424"], "properties": ["cup, bottle, chessboard", "cup, bottle, chessboard"], "captions_pred_pc": ["a black and white 3d shape made up of small dots on a white background", "a black and white 3d shape made up of small dots on a white background"], "captions_pred_image": ["a 3d model of a chess set on a checkered board royalty-free 3d model", "a 3d model of a chess set on a checkered board royalty-free 3d model"], "question": "which entity has a chessboard?", "label": 0}, {"captions": ["3d stone sculpture of a bat", " of a white plastic tube with a hole and a chip on it."], "sample_ids": ["5d5ced90c170408b89f9e5ac7852c1f2", "9968e06a62e8487ea33460e640abc573"], "properties": ["- material is stone - shape is bat", "color is white, material is plastic, shape is tube"], "captions_pred_pc": ["a black and white image of an object on a white background", "a black and white image of a broom on a stand"], "captions_pred_image": ["a 3d image of a stone sculpture in the shape of a bird", "a white object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": [" of a white sheet of paper or box on a gray background.", " of a torn piece of paper, a hole in a wall, and a guitar with a sign on it, accompanied by a piece of metal, wood, and a large rock."], "sample_ids": ["7b0c1e02d9b14f2fae4f1f7040661cc7", "cc4ccf85d4c1425cb5975b8b5664d38a"], "properties": ["color, white, background, gray", "paper, hole, sign"], "captions_pred_pc": ["above a black and white photograph of an object", "a silhouette of a map of the state of karnataka, india on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a paper airplane", "an image of a torn piece of paper in the shape of a map"], "question": "which entity has a sign on it", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", "a low poly of a plant on a white object, resembling a paper or plastic bag."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "d49d8ed244094349a99e4faca05e0690"], "properties": ["yellow, table, roof", "low poly, plant, white"], "captions_pred_pc": ["a black and white drawing of a floor plan", "a black and white illustration of a dandelion on a white background a black and white illustration of a dandelion on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of a plant growing out of a rock"], "question": "which object is white", "label": 1}, {"captions": [" a small island with trees, water, and a river.", " a molecule featuring green, red, and blue spheres."], "sample_ids": ["5a9c593092c04deaa0f17a1c28a79476", "1c0e821eb7c4489dbff9e20d7e8575a3"], "properties": ["water, river, island", "color, sphere, molecule"], "captions_pred_pc": ["in 15 words or less a black and white polka dot pattern on a white background", "a black and white photograph of a group of geometric shapes arranged in the shape of a diamond"], "captions_pred_image": ["a 3d model of a snowy landscape with trees in the foreground royalty free 3d model preview no.2", "a 3d model of a molecule in the shape of a pyramid"], "question": "which entity is a molecule?", "label": 1}, {"captions": [" a red circular object with a checkered pattern, resembling a round pillow or bed cover.", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["9cf9fb6d07084488892422a5a5be00ef", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["pattern, color, shape", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["a black circle on a white background", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a 3d model of a round cushion royalty-free 3d model preview", "a 3d rendering of a plastic box with several compartments"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a black rock with writing on it.", " a house with a green, wooden-structured roof."], "sample_ids": ["f9e35785655f464d8384a8d562de4ba2", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["color, black, writing", "roof, color, green"], "captions_pred_pc": ["a black and white image of a sphere with dots on it", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a black and white photograph of a rock with writing on it", "a 3d model of a house with a triangular roof"], "question": "which object is made of wood", "label": 1}, {"captions": [" a tree with a blue ball and a mushroom.", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["4bb8528bd042471f8865cce122a03924", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["a, ball, mushroom", "lizard, rock, stuffed animal"], "captions_pred_pc": ["a black and white illustration of an airplane flying over a tree", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a tree with a ball on top of it royalty free 3d model preview no.2", "a 3d model of a vehicle with wheels and tires"], "question": "which entity has a stuffed animal?", "label": 1}, {"captions": [" of a jacket on a mannequin with arms outstretched.", "a featuring a skeleton, torn paper, long stick, rock, and broken wood."], "sample_ids": ["1dc7708fd7bd4ea1b035f4c48dbd7868", "46903bf029934b1989bc062dcb0a5531"], "properties": ["arm, jacket, mannequin", "skeleton, torn, paper, long stick, rock, broken wood"], "captions_pred_pc": ["a black and white image of a woman's torso", "a close up of a black object on a white background"], "captions_pred_image": ["a 3d model of a woman's jacket royalty free 3d model preview no.2", "a 3d sculpture of a person's hand in the air royalty-free 3d model preview"], "question": "which entity has a skeleton?", "label": 1}, {"captions": [" of a green man with arms outstretched, appearing to fly.", " a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light."], "sample_ids": ["34850e40521940e49cfc27f0f486f544", "5ea0962b100b4fccb761ed84afe027b5"], "properties": ["color, shape, size", "house, table, chair"], "captions_pred_pc": ["a black and white illustration of an airplane propeller", "above a black and white photograph of an open door"], "captions_pred_image": ["a 3d model of a man with his arms outstretched royalty free 3d model no.2", "a 3d rendering of a small white table with a chair"], "question": "which entity is a table?", "label": 1}, {"captions": [" of a red steel playground structure with yellow rails, featuring a bench and storage rack.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["91e069e84f754aceb99e28541cf7ae39", "c3a82df41875402285608ef13a55df57"], "properties": ["color, bench, rack", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["of a 3d illustration of a black and white square frame", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d rendering of a metal frame structure", "a white plastic object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a small purple plastic chair with four legs.", "a featuring a running mario character, a jumping black bear, and a person jumping and running in the air, with a bear holding a baseball bat."], "sample_ids": ["1bb40ec897884b788dc0a2dac090f347", "5f46ebb58caa4383b5950c2022d2ffd6"], "properties": ["color is purple, material is plastic, number of legs is four", "a, character, mario"], "captions_pred_pc": ["of a black and white image of a baby carrier", "for a black and white sculpture of a person"], "captions_pred_image": ["a 3d model of a white plastic chair", "a 3d model of a cartoon character jumping in the air"], "question": "which entity is a cartoon?", "label": 1}, {"captions": [" a small white building with a door, resembling a box-shaped house.", " a large rock structure with a cave and small hole, resembling a stone sculpture and featuring a white plastic bag."], "sample_ids": ["1b5fe88d0ff149ae9d8b4eb455c5c90c", "120bf1525e8649d9bdf3a593fe8f5ddc"], "properties": ["shape is box, color is white, door is present", "resembles, sculpture, rock"], "captions_pred_pc": ["a black and white image of a person standing in front of a white background", "a black and white illustration of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a white, open shelving unit", "a 3d model of a bag with a zipper on it"], "question": "which entity is a rock?", "label": 1}, {"captions": [" of a stone wall with writing, metal bars, and a broken piece featuring a metal hook, resembling an egyptian cuneiform tablet.", " of a cherry blossom bonsai tree with pink flowers."], "sample_ids": ["9951e345202c44c19ab1eec11934bc52", "037fff0f153c41ea8b9c9392c2e2439a"], "properties": ["image is a stone wall with writing, metal bars, and a broken piece featuring a metal hook, resembling an egyptian cuneiform tablet", "flower, color, pink"], "captions_pred_pc": ["above a black and white drawing of a pair of binoculars", "for a black and white illustration of a person on a skateboard"], "captions_pred_image": ["a 3d rendering of a stone wall with a metal bracket holding it in place", "a 3d model of a bonsai tree on a pedestal"], "question": "which entity has a pink flower", "label": 1}, {"captions": ["a collection featuring various characters, including a man holding a bat, a person doing yoga, a jumping individual, a person with a stick, a flying bird, a man with outstretched arms, a man in a hat, and a suited man flying through the air.", " a woman in a black dress with wings and shoes, featuring a spider and robot elements."], "sample_ids": ["5409e95467e54cbdaddb17695fe563e6", "745fed115c5343dab9d7f0c389c98902"], "properties": ["a, character, person", "costume, color, black"], "captions_pred_pc": ["a 3d model of a person", "a black and white illustration of a bumblebee on a white background"], "captions_pred_image": ["a 3d model of a bird in mid-flight royalty free 3d model preview no. 3", "a 3d model of a person wearing a futuristic costume"], "question": "which costume is black", "label": 1}, {"captions": [" a collection of black and red pens and pipes.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["8a272b7ee0c14a2c98fae74024bf3e93", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, pen, red", "a, material, clay"], "captions_pred_pc": ["of a black leather belt with a silver buckle", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d illustration of a group of pens on a white background royalty free 3d model preview no. 3", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": ["a 3d white object featuring stacked racks, toy train, blocks, plastic pipes, lego pieces, and clothes hangers.", " of a small white building with stairs and a lid."], "sample_ids": ["c9c786f133f54d5e8d99bfc1a588df41", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["stacked, racks, toy train", "building, stairs, lid"], "captions_pred_pc": ["a black and white photo of a person standing in front of a white wall", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a ship's propeller on a white background", "a 3d model of a white box on a gray background"], "question": "which object has a lid?", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", " a small house on an island with trees, shrubs, a pool, and a lake."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "c8331489fca44685bedfa1bdadf6ccb3"], "properties": ["color, shape, and size", "house, lake, pool"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "a black and white image of a pattern on a piece of paper"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d model of a large building"], "question": "which entity has a lake", "label": 1}, {"captions": [" a silver spaceship-skull hybrid with blue lights, spheres, and jewels, featuring a robot with a head and blue eyes.", " a small, snow-covered house."], "sample_ids": ["ae7c805076b14abe83578069c7bf1c03", "0d00d10b90134dbe9ce7b2b3d6669237"], "properties": ["color, light, jewels", "house, snow, cover"], "captions_pred_pc": ["in 15 words or less a black and white image of a snowflake on a white background royalty free illustration", "in 15 words or less a black and white image of a piece of paper on a white background royalty free illustration"], "captions_pred_image": ["a 3d sculpture of a heart with various objects on top of it", "a piece of broken glass on a white background"], "question": "which entity is covered in snow", "label": 1}, {"captions": [" a white shelf with holes and brackets.", "a white glass beer mug."], "sample_ids": ["8f17016d6c0049fc98c5bfe5e6265740", "1d686cbd3e9a4c629a43088658989286"], "properties": ["color, white, holes", "color, white, glass"], "captions_pred_pc": ["a black and white image of a triangle with dots on it", "a black and white drawing of a beer mug on a white background"], "captions_pred_image": ["a white bench with a shelf on top", "a 3d model of a glass pitcher"], "question": "which object is made of glass", "label": 1}, {"captions": ["a low poly of a deer, antelope, llama, capybara, and kangaroo.", " a small building with windows and a roof."], "sample_ids": ["8b4c2e3e76524d85a9395ea1169d953e", "0ef2cac27e364c0687afae7ab5040cc3"], "properties": ["low poly, llama, kangaroo", "roof, windows, building"], "captions_pred_pc": ["above a black and white image of an animal sculpture", "a black and white square made up of small dots on a white background"], "captions_pred_image": ["a 3d low poly animal standing on its hind legs", "a 3d model of an apartment building royalty free 3d model preview no 3"], "question": "which entity has a roof", "label": 1}, {"captions": [" of a white supreme logo t-shirt, low poly design.", " of a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["bea8441c08d94366b96b53775391d8e6", "8cd3f5ff0fc041eca9a480faa6739480"], "properties": ["color, white, logo", "roof, trusses, beams"], "captions_pred_pc": ["for a black and white image of a shirt with dots", "in 15 words or less a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a white t-shirt with a supreme logo", "a 3d model of a roof structure"], "question": "which entity has a roof", "label": 1}, {"captions": [" a white and yellow table with yellow legs and a metal roof structure.", " a house with a roof, wooden beams, and chimney."], "sample_ids": ["a71c43af3c944bf5b6d12375f7d54811", "be1376023c274bdda995d54f3694157f"], "properties": ["color, white, yellow, roof, metal", "roof, beams, chimney"], "captions_pred_pc": ["above a black and white drawing of a floor plan", "a black and white drawing of a bathroom with a shower"], "captions_pred_image": ["a 3d model of a white table with multiple cubes", "a 3d model of a house with a roof"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": [" a pair of fur-trimmed boxing gloves and a human heart, with a man's hand wearing a hat.", "a featuring a man wearing an orange and blue helmet, a white and blue watch, and a blue hat, accompanied by a robot with a head and an orange and blue clock."], "sample_ids": ["661c85f7cea14b7c81fb30b31d603cab", "169855ca6adc4f36814f17d275cb1e95"], "properties": ["boxing gloves, heart, hand", "hat, watch, clock"], "captions_pred_pc": ["a black and white image of a sponge in the shape of the letter 'v'", "in 15 words or less a silhouette of a man smoking a cigarette"], "captions_pred_image": ["a 3d model of the human heart", "a 3d model of a futuristic chair"], "question": "which entity has a watch?", "label": 1}, {"captions": [" a small white house with stairs and a wall-mounted shelf.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["10c4ba5b0db4490db9c00c21c94cb41f", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["house, color, white", "house, roof, wooden"], "captions_pred_pc": ["above a black and white drawing of a bench", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a small white building", "a black and white photograph of a birdhouse"], "question": "which house is made of wood", "label": 1}, {"captions": [" a red \"welcome to northwich\" billboard on a wooden base.", " of a white sheet of paper or box on a gray background."], "sample_ids": ["225e4094141d416faba7c5598dc55ff8", "7b0c1e02d9b14f2fae4f1f7040661cc7"], "properties": ["base material is wood, color is red, message is welcome to northwich", "color, white, background, gray"], "captions_pred_pc": ["a black and white illustration of a circular object with many small dots on it", "above a black and white photograph of an object"], "captions_pred_image": ["a 3d model of a chalkboard on a pedestal royalty free 3d model preview no.2", "a 3d model of a paper airplane"], "question": "which object is white", "label": 1}, {"captions": [" of a small clay pot/bowl with a stone and brick variation, accompanied by a piece of bread.", " a damaged room with destroyed furniture, featuring a kitchen, bathroom, and broken window."], "sample_ids": ["936714802d2849bea3efb1eb5c83cee6", "06dd6456dc244a51b6b6e1c8524820de"], "properties": ["variation, bread, pot", "room, furniture, window"], "captions_pred_pc": ["above a black and white drawing of an object", "above a black and white drawing of a person sitting on a bench"], "captions_pred_image": ["a 3d model of an ancient pottery bowl", "a 3d image of a room with a person in it"], "question": "which entity has a kitchen?", "label": 1}, {"captions": ["white floor lamp with a white shade and base, resembling a white plastic stick or column, featuring a camera on top.", "white butterfly wall lamp with a white shade and bead, ."], "sample_ids": ["f1230cbf5653463bbda1bdf55543ba4d", "fd63194cf4f94d5e96bd2b85f542b936"], "properties": ["shade, base, height", "shade, white, bead"], "captions_pred_pc": ["for a black square on a white background", "a black and white image of a light bulb on a white background"], "captions_pred_image": ["a white plastic toothbrush on a gray background", "a wall lamp with a white shade and a butterfly design"], "question": "which lamp has a white shade", "label": 1}, {"captions": ["a 3d cartoon character wearing a hat and holding various items, such as a sword, gun, broomstick, and stick.", "a featuring a white box with a hole, a paper clip, and a lock."], "sample_ids": ["9863dee114264b89a95dca4c78d08424", "839e143bb1684aaa955f2c3e0cf4eef2"], "properties": ["hat, sword, gun", "box, paper clip, lock"], "captions_pred_pc": ["for a black and white illustration of a small monster holding a spoon in one hand and a cup of coffee in the other", "above a black and white image of a clock in the shape of a spiral"], "captions_pred_image": ["a 3d model of a police officer holding a baton and wearing a hat and helmet", "a 3d model of a stapler with a staple in it royalty free 3d model no."], "question": "which entity has a lock?", "label": 1}, {"captions": [" a clear glass table with metal legs and balls on top.", " of a beige and white round soap dish/small bowl."], "sample_ids": ["7c2bfa826f274377ac21f48d510848c3", "5414d75e47104589837f3df8b6de6d22"], "properties": ["glass, metal, balls", "beige, white, round"], "captions_pred_pc": ["a black and white image of a wine glass", "of a 3d model of a bracelet 3d model of a bracelet on a white background royalty free illustration 2019"], "captions_pred_image": ["a clear acrylic foosball table", "a white ceramic bowl sitting on top of a gray surface"], "question": "which object is round?", "label": 1}, {"captions": [" a wooden wall with a psychedelic circular pattern in red, green, and blue.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["5376daee484349378cb269a771ca5be0", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["color, pattern, wall", "house, pool, balcony"], "captions_pred_pc": ["of a cross made of black yarn on a white background", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a black square with a circular pattern on it royalty free 3d model preview no.1", "a 3d model of a modern house"], "question": "which entity has a pool", "label": 1}, {"captions": [" a small village featuring houses, trees, and a winding road.", "a featuring a boat, table, chairs, umbrella, and solar panel."], "sample_ids": ["7acf46c0265d4e39b97ac084852abde8", "0f0eb3a198d341d28f809b6d7634be8a"], "properties": ["houses, trees, road", "boat, table, chairs, umbrella, solar panel"], "captions_pred_pc": ["in 15 words or less a black and white photo of a mountain landscape", "a black and white illustration of a boat with an umbrella"], "captions_pred_image": ["a black and white photograph of a small town", "a 3d model of a boat, a table, chairs, and an umbrella"], "question": "which entity has a boat", "label": 1}, {"captions": [" of a pile of metal, torn and shredded paper, rocks, and a decayed animal.", " a house with roof trusses and wooden beams on a suspended ceiling."], "sample_ids": ["c117f1923cad4ecf9df61b6e3d633374", "3c2e3a3670b042069bd8290e2c357702"], "properties": ["a pile of metal, torn and shredded paper, rocks, and a decayed animal", "roof trusses, beams, suspended ceiling"], "captions_pred_pc": ["a black and white map of germany on a white background", "above a black and white drawing of a building"], "captions_pred_image": ["a 3d model of a crumpled piece of paper on a white background", "a 3d model of a house with a roof in progress royalty free 3d model preview no. 1"], "question": "which entity is a building?", "label": 1}, {"captions": ["a featuring a ship, large rock, stone slab, ruined building, stone floor, small stone structure, triangular object, and a piece of concrete.", " a small bedroom with wooden floors, walls, roof, and shelf."], "sample_ids": ["d83c5a2fd61c4e9f927d1d7b7c9e5aae", "e602ac60041f4b4f84c044161e478781"], "properties": ["ruined building, stone floor, small stone structure", "floor, wall, roof"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a mountain", "above a black and white image of a decorative metal bar"], "captions_pred_image": ["a 3d model of a piece of broken pottery", "a 3d model of a room with wooden walls and a rug on the floor"], "question": "which entity has a floor", "label": 1}, {"captions": [" of a white building with a small house and a desk with a laptop.", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["9244a2d3a9e94c8398ef991f1661bb58", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["a, desk, laptop", "lizard, rock, stuffed animal"], "captions_pred_pc": ["a black and white image of a piece of furniture", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of an office desk on a white background", "a 3d model of a vehicle with wheels and tires"], "question": "which entity has a stuffed animal?", "label": 1}, {"captions": ["a 3d collection featuring a cash register, destroyed car, pos machine with credit card machine, broken cell phone, black and blue phone, atm machine, crocodile's head, and broken roof.", "a featuring a skeleton, torn paper, long stick, rock, and broken wood."], "sample_ids": ["d9681d1f6fad42ab8d498cba24339ca8", "46903bf029934b1989bc062dcb0a5531"], "properties": ["pos machine, credit card machine, cash register", "skeleton, torn, paper, long stick, rock, broken wood"], "captions_pred_pc": ["a black and white illustration of a glass bottle", "a close up of a black object on a white background"], "captions_pred_image": ["a vintage cash register sitting on top of a table", "a 3d sculpture of a person's hand in the air royalty-free 3d model preview"], "question": "which entity has a skeleton?", "label": 1}, {"captions": ["a featuring a phone booth, desk, chair, lamp, ladder, and two additional chairs.", " a house with a wooden-framed roof structure."], "sample_ids": ["7da804ad2b554c9a9915d775afb015d3", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["desk, chair, lamp", "roof, material, wood"], "captions_pred_pc": ["a black and white illustration of a city skyline", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d rendering of a desk and chair in a room", "a 3d model of a building with a roof"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": ["a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines.", " a multicolored cube representing a protein, featuring pink, yellow, red, and green hues."], "sample_ids": ["c516e491e5ee4313a4c06365ef13af3f", "ee7c3113f2754f9cbe8980b1b7cc4eff"], "properties": ["house, fence, playground", "color, shape, color"], "captions_pred_pc": ["above a black and white drawing of an industrial machine", "a black and white pattern of small dots on a white background a black and white pattern of small dots on a white background illustration"], "captions_pred_image": ["a 3d model of a room with a lot of wires", "a 3d model of a piece of fabric"], "question": "which entity is a cube?", "label": 1}, {"captions": [" a small white building featuring a room with a door and a white shelf.", " a wooden table and bench with a deer head and branch on it."], "sample_ids": ["d7b78fa9a6b64f6095b881bc619b04fe", "857d5391612349f4ae6cd854a1ec96de"], "properties": ["room, door, shelf", "table, bench, deer"], "captions_pred_pc": ["above a black and white illustration of a person standing in front of a door", "a black and white drawing of a table and chairs"], "captions_pred_image": ["a 3d model of an empty room", "a black and white image of a bench and table with a deer's head on the table"], "question": "which entity has a deer head on it", "label": 1}, {"captions": ["a purple and yellow pixelated ethereum logo in pixel art style.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["bcf111e592d64b6490003680cae9407f", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["color, style, pixel", "camera, speaker, ceiling fan"], "captions_pred_pc": ["in 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 1", "for a black and white image of an object on a white background"], "captions_pred_image": ["an image of an electronic device with the letter 'z' on it", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has fewer speakers", "label": 1}, {"captions": ["a featuring a tree stump, mossy wood, leaves, and a rock with grass.", " a row of houses featuring roof structures with green roofs and tiled roof slats."], "sample_ids": ["2527cd763a1a43f9870eb65e44e79f7d", "aef9b23a78a7450286a961cc13448d00"], "properties": ["mossy, rock, grass", "roof, green, tiled"], "captions_pred_pc": ["a black and white image of a person on a skateboard", "of a black and white photo of a decorative metal wall hanging"], "captions_pred_image": ["a 3d model of a piece of wood on a white background", "a 3d model of a set of stainless steel shelves"], "question": "which entity has a green roof", "label": 1}, {"captions": ["a 3d white cube with windows resembling a building.", "a featuring a white box with a hole, a paper clip, and a lock."], "sample_ids": ["4a07a5293f024bb0a353954a056ef626", "839e143bb1684aaa955f2c3e0cf4eef2"], "properties": ["- material is white- color is white- texture is textured", "box, paper clip, lock"], "captions_pred_pc": ["a black and white image of a square made up of small dots", "above a black and white image of a clock in the shape of a spiral"], "captions_pred_image": ["a 3d model of a cube", "a 3d model of a stapler with a staple in it royalty free 3d model no."], "question": "which object is white", "label": 1}, {"captions": [" a large building featuring stairs, a clock tower, a balcony, and a roof.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["e7c78316f9cb4b8aad57a9c933f5278b", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["building, roof, balcony", "house, roof, wooden"], "captions_pred_pc": ["a black and white illustration of a group of dots in the shape of a square on a white background royalty free illustration", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a building with a clock tower", "a black and white photograph of a birdhouse"], "question": "which building has a wooden roof", "label": 1}, {"captions": ["three white plastic containers with lids, including a box, a cylinder, and a bottle.", " a small house with a tree, pool, and pond in a green and blue landscape."], "sample_ids": ["67e8933750254cd8afddbf4865ae9e39", "e22489ba182f45cb81f0a83f22abe9bd"], "properties": ["box, cylinder, bottle", "house, tree, pool"], "captions_pred_pc": ["a black and white dots pattern on a white background", "in 15 words or less a black and white image of a square with dots around it"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic container, and a plastic lid", "a 3d model of a house and a tree in a box royalty free 3d model preview no.3"], "question": "which entity has more trees", "label": 1}, {"captions": [" a multicolored metal building structure with a roof.", " a building with a purple, glass roof and a suspended ceiling featuring beams."], "sample_ids": ["22483891fd124baca3bbc6a6a49adc9c", "a54c746bb2644e3ea4e53ee65e32df64"], "properties": ["color, roof, structure", "roof, glass, purple, ceiling, beams"], "captions_pred_pc": ["of a black and white photo of a bike on a white background", "the letter 't' is made up of tiny white dots on a white background"], "captions_pred_image": ["a 3d model of a barn structure", "a 3d model of a building with a roof"], "question": "which building has a roof made of glass?", "label": 1}, {"captions": [" of a large rock or stone.", "a white teddy bear, cloud, skull, octopus, and bird in various positions on a gray background."], "sample_ids": ["0020acb8b03c4a1694a3a796904421d8", "dd5849aced0443b1b4b38d413f7e06c4"], "properties": ["boulder size, color, shape", "background, color, white"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a piece of paper", "a black and white image of a cat's head"], "captions_pred_image": ["a piece of marble on a white background royalty-free 3d model preview no. 2", "a 3d model of an animal skull in white on a gray background"], "question": "which entity is not a boulder?", "label": 0}, {"captions": ["a featuring a large flying ship, a mountain range with a central lake, and a small island resembling hawaii.", " a futuristic, small spaceship with two propellers, resembling a jet fighter."], "sample_ids": ["4d613d2057454e719bcae7f8cf05210a", "832a022cdcc74763b0571e04af4e592b"], "properties": ["a, island, resembles, hawaii", "resembles a jet fighter, futuristic, small"], "captions_pred_pc": ["above a black and white image of a person's hand holding a pencil", "a black and white illustration of a person's hand holding a pencil"], "captions_pred_image": ["a 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a", "a lego model of a futuristic airplane"], "question": "which entity is a spaceship?", "label": 1}, {"captions": ["a featuring a graffiti-covered train, bench, wall, and skateboard.", " of a small house featuring a flat, brown roof, table with two chairs and a stool, ceiling light, and a window."], "sample_ids": ["6de9fcac063d45df9424decdc215b379", "3a509431d96b43f8a7aebe2846f08b96"], "properties": ["graffiti, bench, wall", "roof, brown, flat"], "captions_pred_pc": ["for a black and white image of a boat", "a black and white drawing of a snowflake on a white background a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a graffiti-covered wall in a black and white photograph", "a 3d rendering of a table and stool"], "question": "which entity has a flat roof", "label": 1}, {"captions": ["a featuring a rock with a hole, a piece of metal, a knife, an arrow, and a person near a sand pit.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["b57936676e9d43abb635fa1217992287", "a17477b445b3443189dad22f768b888b"], "properties": ["a, hole, rock", "roof, pillar, stairs"], "captions_pred_pc": ["a black and white image of a lace belt", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon'", "a 3d model of a small building with a balcony"], "question": "which entity has a roof?", "label": 1}, {"captions": ["a collection featuring a broken egg in a bowl, food wrapped in paper, torn and whole paper pieces, a white paper hat, a skull with a hat, a snowy iceberg, and a piece of cake.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["3d7bd392c9a14f4ab7c0aae2cf75b487", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["hat, food, bowl", "a, material, clay"], "captions_pred_pc": ["in 15 words or less a black and white image of dots on a white background", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of the earth with a hole cut out of it", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": ["a white statue of a woman holding a baby.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["7cf1c30a98fc4ae796e040c2ce18c03a", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["color, white, statue", "house, fence, playground"], "captions_pred_pc": ["a black and white drawing of a heart", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a 3d printed figurine of a woman holding a cat", "a 3d model of a room with a lot of wires"], "question": "which entity has a fence", "label": 1}, {"captions": [" a large building with a roof and windows.", " a house with a wooden-framed roof structure."], "sample_ids": ["32d1fbd3ee91426882290305f70021e6", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["roof, windows, building", "roof, material, wood"], "captions_pred_pc": ["of a black and white photo of a diamond buckle", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of an apartment building royalty free 3d model preview no.2", "a 3d model of a building with a roof"], "question": "which building has a roof made of wood", "label": 1}, {"captions": ["two white spheres in a .", " of two rocks with ice elements."], "sample_ids": ["1c02212a35134545ab63ab180d629c31", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["two, spheres, white, in, a", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white illustration of two spheres", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a white ball on a gray background", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a white staircase with a black railing in a room.", "a 3d white cube with windows resembling a building."], "sample_ids": ["d05c1b6047e145a4906c467a9ebe7430", "4a07a5293f024bb0a353954a056ef626"], "properties": ["color, white, railing, black", "- material is white- color is white- texture is textured"], "captions_pred_pc": ["above a black and white image of a computer screen", "a black and white image of a square made up of small dots"], "captions_pred_image": ["a 3d rendering of a white staircase with a stainless steel handrail", "a 3d model of a cube"], "question": "which object is white", "label": 1}, {"captions": ["a pair of red-handled pliers and wire cutters, with black accents.", "red swivel chair with a white base ."], "sample_ids": ["3ffc31b7e4cd43f3a3df5722ef8b9fca", "7b78fb47a2684906bcc22ac6e848999a"], "properties": ["color, red, black", "color, red, base, white"], "captions_pred_pc": ["a black and white image of a toothbrush and a toothbrush", "a black and white image of a sphere made up of many small dots on a white background"], "captions_pred_image": ["a 3d rendering of a pair of pliers on a white background", "a 3d model of a modern chair"], "question": "which object has a white base?", "label": 1}, {"captions": [" of a small white dollhouse featuring furnished rooms, including a bedroom with a bed and desk, and a bathroom.", " a small white building with stairs and a white table."], "sample_ids": ["f178fb523ad7421aaa90a92ee736ee00", "e30374c614f54fdb90f35b96b071349d"], "properties": ["bedroom, bathroom, bed", "building, stairs, table"], "captions_pred_pc": ["a black and white drawing of a room with dots", "above a black and white drawing of a cat sitting on top of a letter 'e'"], "captions_pred_image": ["a 3d model of a small room with a bed, desk, and chair", "a 3d model of a building with a staircase"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" of an ipad stand with a small speaker and blue-white light on a cylindrical base.", " a white rocking chair with a curved backrest."], "sample_ids": ["e0694fef8e414d69a6a89cdffd212c86", "ee0deb90abf943b6894cd5ded1331213"], "properties": ["base, speaker, light", "backrest, curved, yes"], "captions_pred_pc": ["above a black and white illustration of a clock", "a black and white illustration of a spiral pattern on a white background a black and white illustration of a spiral pattern on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a tablet stand on a pedestal royalty-free 3d model", "a 3d model of a white chair royalty free 3d model no. 3"], "question": "which object has a curved backrest", "label": 1}, {"captions": [" of a construction site featuring a concrete wall, tools, equipment, wooden bench, fence, chair, and a graffiti-covered green wheelbarrow.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["766a075b4760433bafdbe88bee546796", "b896a0898efe4059a776193c02132129"], "properties": ["graffiti, wall, wheelbarrow", "- material is stone, metal, concrete"], "captions_pred_pc": ["for a black and white image of a bowl on a stand", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a wall with graffiti on it", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": ["white 3d triangular object", " a rusted metal barrel with a yellow and red warning sign and stripe on it."], "sample_ids": ["e03feccd20b0441586c0d12621e8139f", "5a49ad82ef7a4d33badea2261720f518"], "properties": ["color is white, shape is triangular, material is plastic", "rusty, warning, metal"], "captions_pred_pc": ["a black and white image of a tablecloth with dots", "a black and white drawing of dots on a white background a black and white drawing of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d white pyramid on a gray background", "a black and white photograph of a barrel"], "question": "which object is made of metal", "label": 1}, {"captions": ["a white 3d object featuring black and white patterns, resembling a combination of a dish, smoke detector, cake, bowl, alarm clock, and ceiling light.", " a small purple plastic chair with four legs."], "sample_ids": ["6a8cc820f00a4cfc954d56e2b1f6206a", "1bb40ec897884b788dc0a2dac090f347"], "properties": ["- material is plastic- color is white- shape is cylinder", "color is purple, material is plastic, number of legs is four"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a plate", "of a black and white image of a baby carrier"], "captions_pred_image": ["a cake with a black and white design on the top of the cake", "a 3d model of a white plastic chair"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a small house on an island with trees, shrubs, a pool, and a lake.", " a small wooden cabinet with a drawer, doubling as a bedside table."], "sample_ids": ["c8331489fca44685bedfa1bdadf6ccb3", "3755f3c19ae549c4bf708462db1b2581"], "properties": ["house, lake, pool", "Cabinet, drawer, wood"], "captions_pred_pc": ["a black and white image of a pattern on a piece of paper", "a black and white image of a square with dots all over it"], "captions_pred_image": ["a 3d model of a large building", "a 3d model of a wooden box with a lid"], "question": "which entity is made of wood", "label": 1}, {"captions": [" of a multi-colored spooling machine with wires and circuit board, featuring a lamp and shelf with toys.", " a spiral staircase with a railing in a small building."], "sample_ids": ["0e3f5cc16806492b948d41a748819ce3", "28cae056856c4a8ba9d1a6af5355f831"], "properties": ["color, shape, material", "staircase, railing, building"], "captions_pred_pc": ["a black and white image of a decorative tile", "a black and white photograph of a light switch"], "captions_pred_image": ["a 3d rendering of an electronic device on a white surface", "a 3d model of a staircase in a white room"], "question": "which staircase has a railing", "label": 1}, {"captions": [" of a stone wall with writing, metal bars, and a broken piece featuring a metal hook, resembling an egyptian cuneiform tablet.", " a futuristic, small spaceship with two propellers, resembling a jet fighter."], "sample_ids": ["9951e345202c44c19ab1eec11934bc52", "832a022cdcc74763b0571e04af4e592b"], "properties": ["image is a stone wall with writing, metal bars, and a broken piece featuring a metal hook, resembling an egyptian cuneiform tablet", "resembles a jet fighter, futuristic, small"], "captions_pred_pc": ["above a black and white drawing of a pair of binoculars", "a black and white illustration of a person's hand holding a pencil"], "captions_pred_image": ["a 3d rendering of a stone wall with a metal bracket holding it in place", "a lego model of a futuristic airplane"], "question": "which entity is a spaceship?", "label": 1}, {"captions": [" a small house with a red roof.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["085db9059b744673b5623b5338e02196", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["roof, red, house", "house, fence, playground"], "captions_pred_pc": ["a black and white dotted square on a white background", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a 3d model of a small shed in the snow", "a 3d model of a room with a lot of wires"], "question": "which house has a fence", "label": 1}, {"captions": [" of a large black mat with square grid design.", " of a white supreme logo t-shirt, low poly design."], "sample_ids": ["72aac2e9ccd7482eb88e5e4bc204fbf3", "bea8441c08d94366b96b53775391d8e6"], "properties": ["size, color, design", "color, white, logo"], "captions_pred_pc": ["a black and white pattern on a white background", "for a black and white image of a shirt with dots"], "captions_pred_image": ["a 3d rendering of a black rubber mat on a gray surface", "a 3d model of a white t-shirt with a supreme logo"], "question": "which is a t-shirt", "label": 1}, {"captions": [" a large, multi-floor building with columns, shelves, conveyor table, and a ceiling structure featuring pipes.", " a building featuring yellow columns, a yellow roof, and a wooden structure."], "sample_ids": ["6d773d2b0ed9437ea2b9b352bd8a5c25", "0ce6a4102f4f40e2a0084938b0a93941"], "properties": ["building, floor, columns", "structure, columns, roof"], "captions_pred_pc": ["in one line a black and white drawing of a dotted pattern on a white background", "a black and white drawing of a window"], "captions_pred_image": ["a 3d rendering of a large white table with multiple shelves", "a 3d model of a building with multiple levels"], "question": "which building has a wooden structure?", "label": 1}, {"captions": [" a house featuring furniture, people, a staircase, and torn-apart sections.", " of a barrel and a cube together."], "sample_ids": ["85335d3349894b5884dbf6f3e7d68fcc", "f00b7661daf544b68cddf85d7d0308c7"], "properties": ["furniture, staircase, torn-apart", "a, barrel, cube"], "captions_pred_pc": ["a black and white image of a pixelated cityscape on a white background royalty free illustration", "a black and white illustration of a 3d cube and a 3d sphere"], "captions_pred_image": ["a 3d model of a damaged room with broken furniture and debris on the floor", "a 3d model of a barrel and a box next to each other royalty free 3d model preview no.3"], "question": "which object is not a barrel?", "label": 0}, {"captions": [" a red sphere, resembling a ball, balloon, or lipstick.", "a 3d white object resembling a knife, sword, and airplane."], "sample_ids": ["f09589903f6146dfb623a6c1a07c5bfa", "d88df1cb10da467bb6f77af6aeaa8f86"], "properties": ["red, sphere, resembles", "shape is cylinder, color is white, material is plastic"], "captions_pred_pc": ["a black and white image of an object on a white background", "of a spike on a white background"], "captions_pred_image": ["a black balloon floating in the air on a gray background", "a 3d model of a paper airplane"], "question": "which object is more likely to be made of plastic", "label": 1}, {"captions": [" of a slice of bread and knife on a cutting board.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["0d5f5baa97754547ad517b694ea8edc7", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["bread, knife, board", "island, mountain, grass"], "captions_pred_pc": ["above a black and white illustration depicting a galaxy with a black hole in the center", "a black and white map of the island of malta"], "captions_pred_image": ["a loaf of bread and a knife on a cutting board", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more grass", "label": 1}, {"captions": ["a 3d white cube.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["94fbd38f3f9c454eafc09bf4d7544146", "bf18bfd89efd43389781050230467d58"], "properties": ["color, shape, size", "Lights, number, five"], "captions_pred_pc": ["of a black lace belt on a white background", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a white object on a gray background", "a white chandelier with five white shades"], "question": "which entity has more lights", "label": 1}, {"captions": [" featuring a chair, table, and refrigerator.", " a house with a wooden-framed roof structure."], "sample_ids": ["cd967e38e9364ed28c4090e905740c9d", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["chair, table, refrigerator", "roof, material, wood"], "captions_pred_pc": ["a 3d rendering of a white surface with black dots on it", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a chair, a table, and a refrigerator", "a 3d model of a building with a roof"], "question": "which entity is made of wood", "label": 1}, {"captions": [" of a sword and knife.", "a featuring a building, a plane, a printing machine, a large machine, a room with a computer, and a large gray box."], "sample_ids": ["8b567403ba614523a298f1c5b2009f92", "7e2b63ba4ce24cecacea67dd052016c1"], "properties": ["a, sword, knife", "building, plane, room"], "captions_pred_pc": ["a black silhouette of a knife on a white background", "a black and white image of a rectangular frame with dots on a white background a black and white image of a rectangular frame with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a samurai sword royalty free 3d model preview no 2", "a 3d model of a box with a lot of items inside"], "question": "which entity has a room?", "label": 1}, {"captions": [" of a mushroom cloud with white tree and coral plant elements, featuring blue and green leaves.", " of a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["bca5233d878e4cf09b5bc2bb6f3915b0", "8cd3f5ff0fc041eca9a480faa6739480"], "properties": ["color, shape, texture", "roof, trusses, beams"], "captions_pred_pc": ["a black and white image of a square with dots on it", "in 15 words or less a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a tree made out of white flowers", "a 3d model of a roof structure"], "question": "which entity has a roof structure", "label": 1}, {"captions": [" a small white box with a shelf and a hole in it.", " a small purple plastic chair with holes."], "sample_ids": ["d023ae78bc5a436eaba13c5ecdd45c56", "fe2bf0f8f5c64dd6bac3e2da0d1b89d0"], "properties": ["a, hole, shelf", "color, plastic, purple"], "captions_pred_pc": ["a black and white drawing of a dotted square on a white background", "this image may contain clothing apparel accessory purse bag and handbag"], "captions_pred_image": ["a 3d model of a white box on a gray background", "3d model of a chair royalty free 3d model preview no 3"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a woman with long wings, legs, hair, and spikes, accompanied by a spider with long legs.", " a small house on an island with trees, shrubs, a pool, and a lake."], "sample_ids": ["68cf560d0c424ec6a3c58e1b9967508d", "c8331489fca44685bedfa1bdadf6ccb3"], "properties": ["hair, legs, wings", "house, lake, pool"], "captions_pred_pc": ["of a white and black bird flying in front of a white background", "a black and white image of a pattern on a piece of paper"], "captions_pred_image": ["a 3d model of a woman with a large spider-like creature on her back", "a 3d model of a large building"], "question": "which entity has a pool", "label": 1}, {"captions": [" a small white and brown house with windows and a door.", " a church building at night with a projection, snow, and a window, featuring a hand with a skeleton on it."], "sample_ids": ["7de99253a4bc4d98bd941e40bbad8c7a", "4389909667db472ea9a57a70eb940fa4"], "properties": ["color, door, window", "building, window, skeleton"], "captions_pred_pc": ["of a person wearing a black and white striped shirt", "a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of a police box royalty free 3d model preview no.2", "a black and white photo of a person's reflection in a mirror"], "question": "which building has a window with a skeleton on it?", "label": 1}, {"captions": [" a large house/building structure with a roof.", " a building with wooden and steel structures, featuring stairs, railings, and a ceiling with numerous pipes."], "sample_ids": ["82859e4c6d4e4bbea94b6252bef1d398", "26c47880756b4876b4f263373c3c5303"], "properties": ["roof, structure, house", "building, material, steel"], "captions_pred_pc": ["a black and white photograph of a metal sculpture", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d model of a large white structure", "a 3d model of a large structure with multiple levels"], "question": "which building is made of steel", "label": 1}, {"captions": [" of a house with multiple roofs and a suspended ceiling, featuring a white and purple tray.", " of a white spiral, earbuds, ceiling light, and silver ring with black and white scissors and design."], "sample_ids": ["7907916e8f524df5b16eb566497db83e", "c69f60b389124ad9b4f81c64ec332054"], "properties": ["color, roof, tray", "earbuds, light, ring"], "captions_pred_pc": ["a black and white image of a metal object", "a black and white drawing of a needle and thread"], "captions_pred_image": ["a 3d model of the roof of a building", "a black and white illustration of a pair of sunglasses and a pair of scissors next to each other on a white background"], "question": "which entity has a ceiling light?", "label": 1}, {"captions": [" a yellow boat/submarine with a red arrow and light.", " a toy motorcycle, car, and robot on an orange platform."], "sample_ids": ["9b9c31fe4e6b4004a4cb34176f329c04", "7407a108e0354925b83b750339bc03df"], "properties": ["color, shape, light", "platform, color, orange"], "captions_pred_pc": ["a black and white image of a letter 'l' on a white background", "a black and white illustration of a bicycle"], "captions_pred_image": ["a 3d rendering of a table with an object on it", "a 3d model of a motorcycle on a pedestal"], "question": "which object is a toy?", "label": 1}, {"captions": [" a large building featuring stairs, a clock tower, a balcony, and a roof.", " a small wooden house with a green roof."], "sample_ids": ["e7c78316f9cb4b8aad57a9c933f5278b", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["building, roof, balcony", "roof, color, green"], "captions_pred_pc": ["a black and white illustration of a group of dots in the shape of a square on a white background royalty free illustration", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d model of a building with a clock tower", "a 3d model of a house with a ladder"], "question": "which building has a green roof", "label": 1}, {"captions": [" a building featuring graffiti, chinese writing, a door, a broken window, and an interior bathroom with a sink.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["1ee3df6f94ea4c329a9c5245634e34d5", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["graffiti, chinese writing, door", "house, roof, wooden"], "captions_pred_pc": ["a black and white illustration of a bridge with dots", "for a black and white image of an object on a white background"], "captions_pred_image": ["a black and white image of a bathroom with a sink and a toilet", "a black and white photograph of a birdhouse"], "question": "which building has a roof", "label": 1}, {"captions": [" a yellow monster-sphere with teeth.", " a house featuring a pink-purple roof with trusses and wooden ceiling beams."], "sample_ids": ["fc5f906bab2c4c36a406ebdc15f58541", "b6b6a3f82bdd47c3afaf9af885ba8703"], "properties": ["color, shape, texture", "roof, trusses, beams"], "captions_pred_pc": ["a black and white illustration of a dandelion on a white background dandelion illustration on a white background illustration", "a black and white pattern of dots in the shape of a square royalty free illustration"], "captions_pred_image": ["a 3d model of a white ball with teeth", "a 3d model of the roof of a building"], "question": "which entity has a roof", "label": 1}, {"captions": [" a white and yellow metal shelving unit with a steel structure, yellow legs, and suspended ceiling system.", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["c1a7d264b34841409009b3d5d39d5b99", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["Steel, Color, Yellow", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["a black and white illustration of a building", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a 3d model of a table with multiple tables and chairs", "a 3d rendering of a plastic box with several compartments"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a vibrant city skyline featuring various colored buildings, trees, and skyscrapers.", " a chair with a yellow seat."], "sample_ids": ["1a1fb9b0d83845f6b1238fb45e0defff", "a4de5f2055154465968f33d6289c64e6"], "properties": ["color, skyline, buildings", "color, yellow, seat"], "captions_pred_pc": ["a black and white illustration of a city skyline", "above a black and white drawing of a square with small dots"], "captions_pred_image": ["a black and white 3d model of a city skyline", "a 3d model of a chair on a white background royalty free 3d model no."], "question": "which entity has a yellow seat", "label": 1}, {"captions": ["a piece of rock with a blue stick.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["8a8b16deb8794d4b812a6b36cc6a3cd9", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["color, rock, stick", "house, fence, playground"], "captions_pred_pc": ["a black and white illustration of dots on a white background a black and white illustration of dots on a white background stock illustration", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a black and white photograph of a rock formation", "a 3d model of a room with a lot of wires"], "question": "which entity has a fence", "label": 1}, {"captions": [" of a bearded man wearing a green shirt and a hat.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["1e4e5e8133ae48c797facaec724c13a5", "bded33af34104b9686b845dfd18309a9"], "properties": ["hat, shirt, bearded", "table, staircase, light"], "captions_pred_pc": ["of a black and white bracelet on a white background", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d model of a man with a beard", "a 3d model of a small table with a staircase"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" a small house with a tree and a rock.", " a small white building with stairs and shelves."], "sample_ids": ["9dc392a7f6e444e5bfb720684d6f864a", "9e1f64d4fd514059be934077717536dc"], "properties": ["house, tree, rock", "building, stairs, shelves"], "captions_pred_pc": ["in 15 words or less the image depicts a white square on a white background with black dots all over the square stock illustration", "a black and white image of a person standing in front of a white background"], "captions_pred_image": ["a 3d model of a small house with a tree in front of it", "a white 3d model of a building with stairs"], "question": "which building has stairs?", "label": 1}, {"captions": [" a pair of fur-trimmed boxing gloves and a human heart, with a man's hand wearing a hat.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["661c85f7cea14b7c81fb30b31d603cab", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["boxing gloves, heart, hand", "camera, speaker, ceiling fan"], "captions_pred_pc": ["a black and white image of a sponge in the shape of the letter 'v'", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of the human heart", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": ["a featuring a plane, a small plane, a bird, and a dragonfly all flying in the air.", " a long row of steel shelves in a warehouse, featuring a suspended scaffolding system."], "sample_ids": ["f39783d05dec49e49482c407d656e0f7", "578fe7a7bd754b889be33aea99cf5050"], "properties": ["a, plane, small plane", "a, material, steel"], "captions_pred_pc": ["of a black and white photo of an airplane propeller", "above a black and white image of a rack with multiple shelves"], "captions_pred_image": ["a 3d printed model of a fighter plane in the air", "a 3d model of a large metal structure"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a plague mask with a rusty, horned, wooden helmet and a crow's head design.", "a victor calculator with a black plastic cover and wall-mounted design."], "sample_ids": ["2b0896f810074399a5ae7d6dbab8c330", "88ffa01f4fc34a8cb3e2a659e9e26125"], "properties": ["- material is wood, rusty, horned", "cover, black, plastic"], "captions_pred_pc": ["in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration", "of a black and white image of a skateboard"], "captions_pred_image": ["3d model of a plague doctor's mask", "a victor calculator on a white background"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a spacious office featuring numerous desks, chairs, computers, and a ceiling with blue and white triangles and a ceiling light.", "a featuring a phone booth, desk, chair, lamp, ladder, and two additional chairs."], "sample_ids": ["0d7e4d9471414a21b4a5b18a54f7ec22", "7da804ad2b554c9a9915d775afb015d3"], "properties": ["ceiling, light, desks", "desk, chair, lamp"], "captions_pred_pc": ["a black and white drawing of a square on a white background", "a black and white illustration of a city skyline"], "captions_pred_image": ["a 3d model of an office space with desks and chairs", "a 3d rendering of a desk and chair in a room"], "question": "which entity has a desk", "label": 1}, {"captions": [" a small white house with stairs and a spiral staircase, featuring a white table and ceiling light.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["e9e1cc7fae22458197a61f43a9c355f4", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["house, staircase, table", "house, pool, balcony"], "captions_pred_pc": ["above a black and white photograph of a dog in a frame", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a small house with a spiral staircase", "a 3d model of a modern house"], "question": "which house has a pool", "label": 1}, {"captions": ["low poly of a blue ice cube, resembling a pear-shaped sphere with a flame-like structure.", "low poly of a blue ice cube, resembling a pear-shaped sphere with a flame-like structure."], "sample_ids": ["6813afb531d041e48532088d01b00db9", "6813afb531d041e48532088d01b00db9"], "properties": ["- color is blue- shape is pear-like- structure is flame-like", "- color is blue- shape is pear-like- structure is flame-like"], "captions_pred_pc": ["a black and white drawing of a starfish on a white background starfish on a white background royalty free illustration", "a black and white drawing of a starfish on a white background starfish on a white background royalty free illustration"], "captions_pred_image": ["a low poly 3d model of a rock", "a low poly 3d model of a rock"], "question": "which entity is a sphere?", "label": 0}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", "a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "1b3945962a4b4cda9fe939dc5d63e789"], "properties": ["color, material, structure", "a room, a cake, a table"], "captions_pred_pc": ["a black and white drawing of a room with dots", "a black and white illustration of an object on a white background"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a 3d rendering of a white room with various items in it"], "question": "which entity has a room?", "label": 0}, {"captions": [" of a pile of metal, torn and shredded paper, rocks, and a decayed animal.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["c117f1923cad4ecf9df61b6e3d633374", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["a pile of metal, torn and shredded paper, rocks, and a decayed animal", "a, material, clay"], "captions_pred_pc": ["a black and white map of germany on a white background", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a crumpled piece of paper on a white background", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": [" of a cactus bunny planter with green leaves in a white bowl.", "\"multiple white cubes arranged in a row on a gray background.\""], "sample_ids": ["03614cc7ab6943e5857f17c5814da146", "17c8222d4ce04e518117078e7de6aaed"], "properties": ["color, bowl, green", "color, background, white"], "captions_pred_pc": ["a black and white illustration of a dandelion in the shape of a dandelion on a white background illustration of a black and white illustration of a dandelion in the shape of a dandelion on a white background royalty free illustration", "a black and white image of a box with the words 'box 2' and 'thoughtful'"], "captions_pred_image": ["3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3d model of a cactus in a bowl 3", "an image of a white background with a few small cubes on it"], "question": "which object has a white background?", "label": 0}, {"captions": [" a wooden object, including a board, piece of wood, box, and shelf.", " of a white nut, bowl, ring, and light fixture."], "sample_ids": ["c986212445a1466ca7be7b5ac6bea729", "02bdf80f614f44c4ad20dd6c3c1316c4"], "properties": ["wood, board, shelf", "nut, bowl, ring"], "captions_pred_pc": ["a black and white drawing of snowflakes on a white background", "a black and white hexagonal pattern on a white background"], "captions_pred_image": ["a 3d rendering of a piece of marble", "a 3d rendering of a white object on a gray background"], "question": "which object has more rings", "label": 1}, {"captions": [" a house with a pink roof, truss, and a square white ceiling lamp.", " a small house with a pink and blue roof and a white ceiling featuring wooden beams."], "sample_ids": ["91b2e9e4660946f5b4808a18b5323b69", "443554d4d7044c66aa8cbff63c737589"], "properties": ["roof, truss, lamp", "roof, color, pink, blue"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white square with dots all over it"], "captions_pred_image": ["a 3d model of a house with a metal roof", "a 3d model of a roof with a triangular design royalty-free 3d model preview no.2"], "question": "which roof is pink", "label": 1}, {"captions": [" a blue and green sneaker with a yellow accent and blue sole.", " a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles."], "sample_ids": ["a2b393941a564397afb75f9ae8e70276", "bd7aab78974643f5a0660c699daf8eb3"], "properties": ["color, blue, green, yellow, blue", "roof, color, yellow"], "captions_pred_pc": ["a pair of sneakers on a white background", "a black and white drawing of a room"], "captions_pred_image": ["a 3d model of a pair of sneakers royalty free 3d model preview no 2", "a 3d model of a table and chairs on a white background"], "question": "which entity has a yellow roof", "label": 1}, {"captions": ["a featuring a bowl of soup, a brush, a knife, a slice of pizza, a torn piece of paper, and a roof with a hole in it.", " of a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["db19e46828c94b1a8b9a6ca9f673c604", "8cd3f5ff0fc041eca9a480faa6739480"], "properties": ["a, roof, soup", "roof, trusses, beams"], "captions_pred_pc": ["in 15 words or fewer a black and white illustration of the moon", "in 15 words or less a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a bowl and chopsticks on a sheet of paper", "a 3d model of a roof structure"], "question": "which roof is made of wood", "label": 1}, {"captions": [" orange and yellow fish, candy, and a pair of shoes.", " a snowy city with buildings and a plane flying overhead."], "sample_ids": ["0fa2a605d7e940e5946f63c0f74234f3", "cc63ceb2b5e84872a1a1f6423de419e2"], "properties": ["color, shape, and material", "building, plane, city"], "captions_pred_pc": ["a black and white photograph of a group of plastic cups", "a black and white photo of an airplane on a white background"], "captions_pred_image": ["a 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes", "a 3d model of a city in black and white"], "question": "which entity is a city?", "label": 1}, {"captions": [" a silver spaceship-skull hybrid with blue lights, spheres, and jewels, featuring a robot with a head and blue eyes.", "a featuring a small boat, a rock with a hole, and blue water."], "sample_ids": ["ae7c805076b14abe83578069c7bf1c03", "7ccdffc0d6404e8d9144260255ea0c5c"], "properties": ["color, light, jewels", "water, boat, rock"], "captions_pred_pc": ["in 15 words or less a black and white image of a snowflake on a white background royalty free illustration", "a black and white illustration of a surfboard"], "captions_pred_image": ["a 3d sculpture of a heart with various objects on top of it", "a 3d image of an animal laying on the ground"], "question": "which entity has a boat?", "label": 1}, {"captions": [" a small white gravestone on a grassy field.", " a clay pot with holes in it."], "sample_ids": ["b43779ae94f74bffba4f29863518f506", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["location, field, gravestone", "hole, material, clay"], "captions_pred_pc": ["a black and white illustration of a whale", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake", "a clay sculpture of a face with holes in it"], "question": "which object is made of clay", "label": 1}, {"captions": ["a 3d white cube featuring a hole, wheels, and a diamond.", " of a yellow metal locker with legs, wheels, and metal brackets."], "sample_ids": ["e44009d33258425e8efedfbc6823bf70", "e3fde8fe782c41f0b141c9f1b8e13aa5"], "properties": ["- color is white- shape is cube- material is plastic", "metal, legs, wheels"], "captions_pred_pc": ["for a black and white image of a toothbrush in the shape of a toothbrush", "a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white"], "captions_pred_image": ["a 3d model of a white cube", "a 3d model of an old metal locker"], "question": "which object is made of metal", "label": 1}, {"captions": ["a white featuring a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet.", "a featuring a fireplace with a broom, shovel, and other items, a man and a dog, a table with a bucket, and a vase with a pipe."], "sample_ids": ["f28f57745af04115b0bacdde80c3b9ee", "fdcbb46224a44faca5c3fb20264eb2e7"], "properties": ["a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet", "broom, shovel, fireplace"], "captions_pred_pc": ["a black and white illustration of a pair of sunglasses", "a close-up view of a white plastic bag with a small hole in it"], "captions_pred_image": ["a 3d model of a medieval drinking horn 3d model available for 3d printing royalty free 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d", "a 3d model of an outdoor fireplace"], "question": "which entity has a fireplace?", "label": 1}, {"captions": [" of a green alien creature with long legs and a long tail.", " of a hammer with a long metal handle."], "sample_ids": ["ad7c9475a4e24462bf6b5c24bcde317a", "30f4b6bcbbb44f568cab4fd439d05145"], "properties": ["color, leg, tail", "handle, metal, long"], "captions_pred_pc": ["a black and white image of a sword on a white background", "a black and white toothbrush on a white background"], "captions_pred_image": ["a 3d model of an alien creature with a long tail", "a hammer with a wooden handle and metal head"], "question": "which object has a long metal handle", "label": 1}, {"captions": [" a large orange keg with a white lid.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["cf24eea70b4f4067b36583924a82cc35", "97e000ff41094665afd94ea565da8b13"], "properties": ["color, lid, orange", "roof, material, wood"], "captions_pred_pc": ["a black and white circular pattern of dots on a white background a black and white circular pattern of dots on a white background royalty free illustration", "a black and white drawing of a floor plan"], "captions_pred_image": ["a gray keg with a white lid sits on top of a gray surface", "a 3d model of the roof of a building"], "question": "which object is made of wood", "label": 1}, {"captions": [" a white plastic container with a lid, a small box, a cup, a bottle, and a jar.", " tall grass, plants, rocks, and a tree."], "sample_ids": ["20a02705a66f460492e07345e84a62ed", "eefed882ed5f4711bc5a76332d9712f3"], "properties": ["a box, a cup, a bottle, a jar", "grass, plants, rocks"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white drawing of a tree"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic cap, and a plastic container", "a 3d model of a group of trees"], "question": "which entity has more grass", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " a mountainous landmass, resembling a small island or state like kentucky or wyoming."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "8ca9b999b69c4965bd9eb4445d605bf2"], "properties": ["roof truss, insulation, suspended ceiling", "mountainous, landmass, state"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "a black and white map of the state of new york"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d model of a piece of paper"], "question": "which entity is not a state?", "label": 1}, {"captions": [" a modern office building with a green door, green roof, windows, and blue lights.", " an african figurine with a bow and arrow, standing on a wooden base, wearing a hat."], "sample_ids": ["d6087023095446fbadef1721478373b2", "d81d13362ae04371bb2cba46e4939665"], "properties": ["door, roof, window", "hat, bow, arrow"], "captions_pred_pc": ["a black and white drawing of a toilet brush", "above a black and white photo of an ice sculpture"], "captions_pred_image": ["a 3d model of an apartment building", "a sculpture of an african man sitting on a pedestal"], "question": "which entity has a hat?", "label": 1}, {"captions": ["a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines.", " a large steel and metal structure with a pool and scaffolding system."], "sample_ids": ["c516e491e5ee4313a4c06365ef13af3f", "5850d5c7223447db816081d50292fec0"], "properties": ["house, fence, playground", "structure, material, pool"], "captions_pred_pc": ["above a black and white drawing of an industrial machine", "a black and white drawing of a bridge with chains"], "captions_pred_image": ["a 3d model of a room with a lot of wires", "a 3d model of a large concrete structure"], "question": "which structure has a pool", "label": 1}, {"captions": [" a small, modern house with a green roof, located on a hill, surrounded by trees, grass, and a pond.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["a452d5381dad4dc09f5ebe10635ae5fe", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["house, roof, green", "house, fence, playground"], "captions_pred_pc": ["above an illustration of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a 3d model of a building with a black roof", "a 3d model of a room with a lot of wires"], "question": "which house has a fence", "label": 1}, {"captions": ["a 3d object featuring a fish with a long nose and open mouth, a cat head with red, yellow, and green stripes, a frog with a long snout, a white horse with green and yellow stripes, and a wolf mask.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["276699bb0f974c47b4e2954cfcd1651c", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["a, color, white", "house, fence, playground"], "captions_pred_pc": ["a black and white image of a skull in the shape of a butterfly", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a 3d model of an animal with a long nose", "a 3d model of a room with a lot of wires"], "question": "which entity has a fence", "label": 1}, {"captions": [" a small off-road toy vehicle with wheels.", " a small white building with stairs and shelves."], "sample_ids": ["7ef2589539b84054b2185633112825cc", "9e1f64d4fd514059be934077717536dc"], "properties": ["wheel, toy, vehicle", "building, stairs, shelves"], "captions_pred_pc": ["of a black and white photo of a cross on a white background", "a black and white image of a person standing in front of a white background"], "captions_pred_image": ["a 3d model of a dune buggy royalty free 3d model preview no 3", "a white 3d model of a building with stairs"], "question": "which entity is a building?", "label": 1}, {"captions": [" a small house with a red roof.", " a small village featuring houses, trees, and a winding road."], "sample_ids": ["085db9059b744673b5623b5338e02196", "7acf46c0265d4e39b97ac084852abde8"], "properties": ["roof, red, house", "houses, trees, road"], "captions_pred_pc": ["a black and white dotted square on a white background", "in 15 words or less a black and white photo of a mountain landscape"], "captions_pred_image": ["a 3d model of a small shed in the snow", "a black and white photograph of a small town"], "question": "which entity has a road?", "label": 1}, {"captions": ["a small 3d purple teapot and elephant.", " of a white and brown sea shell with a hole and small pearls on it, resembling a sea urchin."], "sample_ids": ["4a27592dc8164f709b44446ee11832c0", "411c164757fc4de68dfecb35fa858223"], "properties": ["color, shape, material", "resembles, sea urchin, shell"], "captions_pred_pc": ["a black and white 3d illustration of an elephant's head on a white background 3d illustration of an elephant's head on a white background royalty free illustration", "in 15 words or less a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a"], "captions_pred_image": ["a white ceramic teapot on a gray background", "a 3d model of a sea urchin"], "question": "which entity is made of a shell", "label": 1}, {"captions": [" of a wooden staircase with marble floor and wooden railings in a house.", " a house with a yard, trees, bushes, and surrounding buildings."], "sample_ids": ["ee70964ce5e841bd87381cff40d59b88", "7f8942ef51dd4246993a587a12df168c"], "properties": ["floor, staircase, railings", "house, yard, surrounding buildings"], "captions_pred_pc": ["a black and white drawing of a light switch", "a black and white image of a truck on a white background"], "captions_pred_image": ["a 3d model of a staircase on a marble surface", "a 3d model of a house in the middle of a field"], "question": "which house has a yard", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", " a house featuring a roof with wooden trusses and a ladder."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "3cd410c4359a4cef98702963a2b9802b"], "properties": ["yellow, table, roof", "roof, trusses, ladder"], "captions_pred_pc": ["a black and white drawing of a floor plan", "a black and white drawing of a tv screen on a white background"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of the roof of a building"], "question": "which entity has a roof with wooden trusses and a ladder?", "label": 1}, {"captions": [" a small white house with stairs and a wall-mounted shelf.", " a small house with stairs and a roof."], "sample_ids": ["10c4ba5b0db4490db9c00c21c94cb41f", "e9305c80010f4e3b9de9789f01a9bee5"], "properties": ["house, color, white", "roof, stairs, house"], "captions_pred_pc": ["above a black and white drawing of a bench", "above a black and white image of a square with dots all over it"], "captions_pred_image": ["a 3d model of a small white building", "a 3d rendering of a podium on a wooden floor"], "question": "which house has a roof", "label": 1}, {"captions": [" a truck with luggage on top and various parts.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["a6d5c3b54ebd4d17ba73f86d1527793c", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["Parts, Luggage, Truck", "house, pool, balcony"], "captions_pred_pc": ["for a black and white image of a person holding a paintbrush", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a small vehicle with a trailer attached to it royalty free 3d model preview no.2", "a 3d model of a modern house"], "question": "which entity has a pool", "label": 1}, {"captions": [" a metal-framed wall with red and blue bars in a steel structure.", " a green circuit board."], "sample_ids": ["fefc99453e2d4406a9668d5697224c0f", "1b2cc7abaf5e4e7e9d4652163d051b16"], "properties": ["color, red, blue, structure", "color, green, circuit board"], "captions_pred_pc": ["a black and white image of a person holding a toothbrush", "of the item in the image"], "captions_pred_image": ["a 3d rendering of a metal frame structure", "a 3d model of an electronic component"], "question": "which entity is a circuit board?", "label": 1}, {"captions": [" a stone throne with stairs and a tree, accompanied by a concrete wall featuring a statue and a fireplace with a shelf above it.", " tall grass, plants, rocks, and a tree."], "sample_ids": ["93fb4197f0014f7582029af24c7ed9de", "eefed882ed5f4711bc5a76332d9712f3"], "properties": ["throne, stairs, tree", "grass, plants, rocks"], "captions_pred_pc": ["in 15 words or less a black and white image of a toilet paper roll on a white background royalty free illustration", "a black and white drawing of a tree"], "captions_pred_image": ["a 3d model of a throne with a tree on it", "a 3d model of a group of trees"], "question": "which entity has more grass", "label": 1}, {"captions": [" a small white toy cannon.", "a black and gold cylindrical object with a gold handle, resembling a cigarette lighter, lamp, and pen."], "sample_ids": ["a414258c6bf149708404913a89ab8dbc", "c9b1c89380e947f58aa06eb56c93c6d8"], "properties": ["color, white, toy", "- color is black and gold- shape is cylindrical- material is metal"], "captions_pred_pc": ["for 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer on a white background 3d illustration of a hair dryer", "a black and white image of a circular object on a white background"], "captions_pred_image": ["a 3d model of a cannon on a white background", "a 3d model of a black and white object on a gray background"], "question": "which object is made of metal", "label": 1}, {"captions": [" a child's room in a small house with windows.", " a small wooden house."], "sample_ids": ["88847a6445044bcbab9611e6028a19b9", "4cb4dba1237443eb8dc299530fa12521"], "properties": ["room, house, windows", "house, material, wood"], "captions_pred_pc": ["for a black and white drawing of a snowflake", "a house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots"], "captions_pred_image": ["a 3d model of a child's room with toys and furniture in it royalty free 3d model preview no.1", "a 3d model of a small cottage"], "question": "which house is made of wood", "label": 1}, {"captions": [" featuring a chair, table, and refrigerator.", " a wooden billiard table with legs."], "sample_ids": ["cd967e38e9364ed28c4090e905740c9d", "b20ad62516fa467ba6e8de063998e8e4"], "properties": ["chair, table, refrigerator", "legs, material, wood"], "captions_pred_pc": ["a 3d rendering of a white surface with black dots on it", "a black and white drawing of a rectangular shaped object"], "captions_pred_image": ["a 3d model of a chair, a table, and a refrigerator", "a black and white image of a pool table"], "question": "which object has legs", "label": 1}, {"captions": [" of a slice of bread and knife on a cutting board.", " of a white sculpture, resembling a horse and paper plane, on a gray background."], "sample_ids": ["0d5f5baa97754547ad517b694ea8edc7", "179b4438edfc4a43a27a83784f38ff4b"], "properties": ["bread, knife, board", "color, background, white"], "captions_pred_pc": ["above a black and white illustration depicting a galaxy with a black hole in the center", "above a black and white image of a sculpture in the shape of a bird"], "captions_pred_image": ["a loaf of bread and a knife on a cutting board", "a 3d printed sculpture of a horse's head on a gray background"], "question": "which entity is not white?", "label": 0}, {"captions": [" featuring a moss-covered rock, mushroom in grass, and green leaves with a brown spot.", "a small blue couch with gold trim and a key attached, a blue and yellow chair, and various s of bags containing blue and green objects, a blue stone, and a frog on a cart."], "sample_ids": ["34ebe81ae93841ca829efd15aee4d8c1", "0e20c8a7599d43128d078d9a0973b5f9"], "properties": ["moss, mushroom, grass", "blue, yellow, frog"], "captions_pred_pc": ["for a black and white illustration of a cloud on a white background", "of a black and white photo of a chaise lounge"], "captions_pred_image": ["a 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor", "a 3d model of a piece of furniture"], "question": "which entity has a frog?", "label": 1}, {"captions": [" a small village featuring houses, trees, and a winding road.", " a wooden table and bench with a deer head and branch on it."], "sample_ids": ["7acf46c0265d4e39b97ac084852abde8", "857d5391612349f4ae6cd854a1ec96de"], "properties": ["houses, trees, road", "table, bench, deer"], "captions_pred_pc": ["in 15 words or less a black and white photo of a mountain landscape", "a black and white drawing of a table and chairs"], "captions_pred_image": ["a black and white photograph of a small town", "a black and white image of a bench and table with a deer's head on the table"], "question": "which entity has a deer head on it", "label": 1}, {"captions": [" a small house with a tree and a rock.", " a small building with windows and a roof."], "sample_ids": ["9dc392a7f6e444e5bfb720684d6f864a", "0ef2cac27e364c0687afae7ab5040cc3"], "properties": ["house, tree, rock", "roof, windows, building"], "captions_pred_pc": ["in 15 words or less the image depicts a white square on a white background with black dots all over the square stock illustration", "a black and white square made up of small dots on a white background"], "captions_pred_image": ["a 3d model of a small house with a tree in front of it", "a 3d model of an apartment building royalty free 3d model preview no 3"], "question": "which entity has a roof", "label": 1}, {"captions": [" a loaf of bread and a piece of cake.", "a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars."], "sample_ids": ["e9a56b5d4f344d5699ddb629f4b94ef8", "1b3945962a4b4cda9fe939dc5d63e789"], "properties": ["bread, cake, loaf", "a room, a cake, a table"], "captions_pred_pc": ["a black and white drawing of a piece of fabric with dots", "a black and white illustration of an object on a white background"], "captions_pred_image": ["a 3d model of a loaf of bread royalty free 3d model preview no 3", "a 3d rendering of a white room with various items in it"], "question": "which entity has a table?", "label": 1}, {"captions": [" a small house with a road in front of it.", " of a small wooden house with a blue roof, featuring a police sign and resembling a lighthouse."], "sample_ids": ["9ff45258feba4c68bb279efeb829538f", "9b2c93d651c3409096118c5ce5b993f2"], "properties": ["house, front, road", "house, roof, blue"], "captions_pred_pc": ["above a black and white drawing of a building", "a black and white illustration of a coffee mug on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small house on a road", "a 3d model of a small house and barn"], "question": "which house has a blue roof", "label": 1}, {"captions": [" a pink room featuring a bed, desk, window, and lamp.", " a wooden shed with a gray roof."], "sample_ids": ["395af20de6fe49dbbbb030f0e452cbe1", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["bed, desk, window", "roof, color, gray"], "captions_pred_pc": ["of a black and white drawing of a curved line", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a bedroom royalty free 3d model preview no.2", "a 3d model of a shed with a gray roof"], "question": "which entity has a roof that is the color of gray", "label": 1}, {"captions": [" a room featuring a wall with a painting, a hole, and a door.", " a wooden shed with a gray roof."], "sample_ids": ["1d1328346a464d2482463d6d5288e934", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["painting, door, wall", "roof, color, gray"], "captions_pred_pc": ["in one hundred words or less an illustration of an igloo on a white background stock illustration", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a black and white photograph of a torn piece of paper in the shape of a bird", "a 3d model of a shed with a gray roof"], "question": "which entity has a roof that is the color gray", "label": 1}, {"captions": [" the earth featuring various elements such as temperature chart, blue and green stripes, blue arrow, exosphere label, england label, blue and purple stripes, and a blue flag.", " a wooden roof structure with a pink roof."], "sample_ids": ["4945571db2d8467cb2aed8dd0d891c2e", "b70565bda31d42958d3597bf6067ddd2"], "properties": ["color, temperature, england", "roof, color, pink"], "captions_pred_pc": ["of a black and white photo of an airplane on a white background", "above a black and white image of a metal grate"], "captions_pred_image": ["a 3d model of the earth with a rainbow in the sky", "a 3d model of the roof of a building"], "question": "which entity has a roof that is the color pink?", "label": 1}, {"captions": ["a white of a spaceship and building.", " a wooden roof structure with a pink roof."], "sample_ids": ["bf7d4277c9184d35abdec85bd5e25956", "b70565bda31d42958d3597bf6067ddd2"], "properties": ["image, building, spaceship", "roof, color, pink"], "captions_pred_pc": ["a black and white drawing of a tree", "above a black and white image of a metal grate"], "captions_pred_image": ["a 3d model of a white object on a gray background", "a 3d model of the roof of a building"], "question": "which building has a pink roof", "label": 1}, {"captions": [" a small house with a red roof.", " a small house with a pond and situated on a rock."], "sample_ids": ["085db9059b744673b5623b5338e02196", "92859eb82a344134806b37cc209927c6"], "properties": ["roof, red, house", "house, rock, pond"], "captions_pred_pc": ["a black and white dotted square on a white background", "in 15 words or less a black and white drawing of a toaster"], "captions_pred_image": ["a 3d model of a small shed in the snow", "a 3d model of a house in the middle of a field"], "question": "which house is situated on a rock", "label": 1}, {"captions": [" a house with a roof structure and toothbrushes.", " of a small house featuring a flat, brown roof, table with two chairs and a stool, ceiling light, and a window."], "sample_ids": ["7632d1ba4e8144c19484c263b6074d0c", "3a509431d96b43f8a7aebe2846f08b96"], "properties": ["house, roof, toothbrushes", "roof, brown, flat"], "captions_pred_pc": ["a black and white illustration of the letter 'b' isolated on a white background illustration", "a black and white drawing of a snowflake on a white background a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a white box with a lot of blades", "a 3d rendering of a table and stool"], "question": "which house has a brown roof", "label": 1}, {"captions": [" a large, rusty, square-shaped metal pillar resembling a rusted tower.", " of an ancient stone bowl, wooden headpiece, and broken pottery pieces."], "sample_ids": ["b5afccae993346079483507296fbb029", "d48b6ff03d6744eb921c41a4a05ff55d"], "properties": ["shape is square, material is metal, color is rusty", "bowl, pottery, headpiece"], "captions_pred_pc": ["above a black and white image of a square frame with dots", "a black and white illustration of a circle made up of many small dots"], "captions_pred_image": ["a 3d model of a concrete column", "a 3d model of a piece of ancient pottery"], "question": "which object is made of stone", "label": 1}, {"captions": [" the earth featuring various elements such as temperature chart, blue and green stripes, blue arrow, exosphere label, england label, blue and purple stripes, and a blue flag.", " of a slice of bread and knife on a cutting board."], "sample_ids": ["4945571db2d8467cb2aed8dd0d891c2e", "0d5f5baa97754547ad517b694ea8edc7"], "properties": ["color, temperature, england", "bread, knife, board"], "captions_pred_pc": ["of a black and white photo of an airplane on a white background", "above a black and white illustration depicting a galaxy with a black hole in the center"], "captions_pred_image": ["a 3d model of the earth with a rainbow in the sky", "a loaf of bread and a knife on a cutting board"], "question": "which object is on a cutting board?", "label": 1}, {"captions": [" a silver spaceship-skull hybrid with blue lights, spheres, and jewels, featuring a robot with a head and blue eyes.", " a house with wooden framing and trusses."], "sample_ids": ["ae7c805076b14abe83578069c7bf1c03", "4501794e257c4a8ba60a94757d8e93a9"], "properties": ["color, light, jewels", "frame, trusses, wood"], "captions_pred_pc": ["in 15 words or less a black and white image of a snowflake on a white background royalty free illustration", "a black and white drawing of a window"], "captions_pred_image": ["a 3d sculpture of a heart with various objects on top of it", "a 3d model of a house under construction"], "question": "which entity is made of wood", "label": 1}, {"captions": ["a featuring a kite, paper airplane, and birds flying alongside a red, white, and blue plane in the sky.", " a black and white striped box."], "sample_ids": ["f7bb7dcf3c774149809444d6c7d20ab8", "00fa8accaaad44c780efe0c04ed4a12b"], "properties": ["color, plane, sky", "color, black, white"], "captions_pred_pc": ["a silhouette of a man and a child standing on the edge of a cliff silhouette of a man and a child standing on the edge of a cliff silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of", "in 15 words or less a black and white pattern on a white background"], "captions_pred_image": ["a 3d sculpture of a bird in flight on a gray background", "a 3d image of a black and white striped surface"], "question": "which object is black and white?", "label": 1}, {"captions": [" a house featuring a purple roof and a suspended ceiling with a light fixture.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["579b43057ef74bd08d06bdd3e8d973a0", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["roof, purple, suspended", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white image of a piece of paper with a pattern of dots on it", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of the roof of a building royalty free 3d model no.", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a glass bottle with liquid, ice, and a lid, resembling a salt shaker and a human figure.", " a house with a yard, trees, bushes, and surrounding buildings."], "sample_ids": ["bb01282cb8b64470866260455b0b46fa", "7f8942ef51dd4246993a587a12df168c"], "properties": ["liquid, ice, lid", "house, yard, surrounding buildings"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a dandelion on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration", "a black and white image of a truck on a white background"], "captions_pred_image": ["a 3d model of a plastic bottle on a white background", "a 3d model of a house in the middle of a field"], "question": "which entity has a yard", "label": 1}, {"captions": [" a pyramid with blue and pink lines, wires, and mesh.", "a white 3d-printed plastic container set with a lid, two small containers, and a hat-like attachment."], "sample_ids": ["a3b2db8d5c6044f88b275839d0cd71bd", "1da865c75a5e4a57a17652975dae5474"], "properties": ["color, shape, material", "color, white, plastic"], "captions_pred_pc": ["a black and white image of a patterned rug", "a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a"], "captions_pred_image": ["a 3d model of the pyramid roof royalty-free 3d model preview no.1", "a 3d model of a white box, a white lid, and a white container"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a 3d blue star featuring various text, including \"prchen,\" \"bible chen,\" \"rib chicken,\" and \"birch chen.\"", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["0b712fc68ad44ce8a33592d2b26aac18", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["color, shape, text", "island, mountain, grass"], "captions_pred_pc": ["a black and white image of a pair of sunglasses", "a black and white map of the island of malta"], "captions_pred_image": ["a white paper airplane flying over a gray background", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more grass", "label": 1}, {"captions": ["a collection featuring a furnished room, destroyed building, us map, house with roof, flying plane, and a ring with paper.", " a wooden shed with a gray roof."], "sample_ids": ["f13d2d1d78cd49e78f3430abbb251edd", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["collection, room, destroyed, house, roof, plane, ring, paper", "roof, color, gray"], "captions_pred_pc": ["a black and white image of a person's face", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a black and white photograph of a person sitting on a couch", "a 3d model of a shed with a gray roof"], "question": "which roof is gray", "label": 1}, {"captions": [" a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole.", " a mountainous landmass, resembling a small island or state like kentucky or wyoming."], "sample_ids": ["b16fb21cda9a4a21a024df749c2304f4", "8ca9b999b69c4965bd9eb4445d605bf2"], "properties": ["roof, ceiling, hole", "mountainous, landmass, state"], "captions_pred_pc": ["a black and white image of a square with dots on it", "a black and white map of the state of new york"], "captions_pred_image": ["a 3d model of a small house and a tree in the foreground", "a 3d model of a piece of paper"], "question": "which entity is not a state?", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", "a featuring a red hat, floating cup, bowl filled with candy, and a strawberry."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "e27a9fd533dc41da9cf2eeb8fee2a5af"], "properties": ["roof truss, insulation, suspended ceiling", "hat, candy, strawberry"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "a black and white illustration of two spheres"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a black and white image of a person wearing a hat"], "question": "which entity has a floating cup?", "label": 1}, {"captions": ["3d silver sculpture resembling a triangular wing with a white arrow design.", " an african figurine with a bow and arrow, standing on a wooden base, wearing a hat."], "sample_ids": ["578cb910905042939d876af28e29eb2f", "d81d13362ae04371bb2cba46e4939665"], "properties": ["wing, silver, white", "hat, bow, arrow"], "captions_pred_pc": ["a black and white illustration of a pair of hands", "above a black and white photo of an ice sculpture"], "captions_pred_image": ["a white paper airplane flying against a gray background", "a sculpture of an african man sitting on a pedestal"], "question": "which entity is a figurine?", "label": 1}, {"captions": [" a stone archway in a grassy area.", " a small black, white, and orange walkie-talkie."], "sample_ids": ["15deaa04ed6147559ab078fe90d5d999", "e098706085a44898abbd549693d12a64"], "properties": ["location, area, material", "color is black, white, orange"], "captions_pred_pc": ["a black and white image of a car in the middle of a circle", "of a white and black clutch with a flower design"], "captions_pred_image": ["a 3d model of a stone archway", "a 3d model of a walkie-talkie royalty free 3d model preview no 3"], "question": "which object is made of black", "label": 1}, {"captions": [" a robot head with a helmet.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["55ccc07cea0640788d42448bc6559087", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["helmet, head, robot", "a, material, clay"], "captions_pred_pc": ["of a silver ring with black crystals", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a bust of a robot head royalty free 3d model preview no 2", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": [" a table with two bowls and a cup on it.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["41842c8d2ebd402da04def3c53c41633", "06a1c233fb444830b577aa06e2c01294"], "properties": ["a, bowl, cup", "house, tree, hill"], "captions_pred_pc": ["a black and white image of two eyes in the sky", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a 3d model of a table on a marble floor royalty free 3d model preview no. 3", "a black and white image of a house in the middle of a field"], "question": "which entity has more trees", "label": 1}, {"captions": ["a featuring a rock with a hole, a piece of metal, a knife, an arrow, and a person near a sand pit.", " a suitcase on wheels, featuring a bird, a laptop, a robot, and a vacuum cleaner."], "sample_ids": ["b57936676e9d43abb635fa1217992287", "e8100bef7b8a48d4ac79684bffb349ba"], "properties": ["a, hole, rock", "Wheels, laptop, robot"], "captions_pred_pc": ["a black and white image of a lace belt", "a black and white drawing of a cell phone"], "captions_pred_image": ["a 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon'", "a black and white image of a suitcase on wheels"], "question": "which entity has a laptop?", "label": 1}, {"captions": [" of a wooden table with metal legs and frame.", " a small house with a roof and door, resembling a shack or shed."], "sample_ids": ["d6d6f13cda4e485fbed0dcd19b9c9314", "f1b557775310478893242180defa4d80"], "properties": ["metal, legs, frame", "shack, roof, door"], "captions_pred_pc": ["a black and white image of a patterned rug", "a black and white illustration of a telephone on a white background"], "captions_pred_image": ["a 3d rendering of a white wooden table", "a 3d model of a small house in the middle of a field"], "question": "which entity has a roof and door", "label": 1}, {"captions": [" of a blue and white shelf, cube, and tube.", " a small house on a hill in a field."], "sample_ids": ["a4dd3e2c48224cff9ac2a2b0c813f06a", "bd873071252047d38160c4a5fdd2c1b7"], "properties": ["color, shape, material", "house, hill, field"], "captions_pred_pc": ["for a black and white photo of a person sitting on a bench", "a black and white photograph of a piece of paper"], "captions_pred_image": ["a 3d rendering of a gray and white shelf", "a black and white image of a small house"], "question": "which object is made of wood", "label": 1}, {"captions": [" a destroyed house and plane amidst a town with buildings.", " a house with a pink roof, brick walls, and insulated ceiling."], "sample_ids": ["0fd3ddca09194b8f94ef731af3b64a08", "c8936ace72954650b4e2d84246964849"], "properties": ["house, plane, town", "roof, color, pink"], "captions_pred_pc": ["above a black and white drawing of a piece of paper", "a black and white drawing of a toilet"], "captions_pred_image": ["a 3d model of a damaged building", "a 3d model of a house with a roof"], "question": "which house has a roof that is the color pink", "label": 1}, {"captions": ["an orange of a forklift truck in a parking lot.", "a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars."], "sample_ids": ["32d757fbd29640ffb5aab34925525a29", "1b3945962a4b4cda9fe939dc5d63e789"], "properties": ["orange, forklift, parking lot", "a room, a cake, a table"], "captions_pred_pc": ["a black and white illustration of a cell phone", "a black and white illustration of an object on a white background"], "captions_pred_image": ["a black and white photograph of a forklift", "a 3d rendering of a white room with various items in it"], "question": "which entity has a table?", "label": 1}, {"captions": [" a red, blue, and green striped tower building.", " of a small wooden house with a blue roof, featuring a police sign and resembling a lighthouse."], "sample_ids": ["8668f9e9d1a64b86b31f260b8056cd19", "9b2c93d651c3409096118c5ce5b993f2"], "properties": ["color, red, blue, green", "house, roof, blue"], "captions_pred_pc": ["a black and white drawing of a butterfly on a white background a black and white drawing of a butterfly on a white background royalty free illustration", "a black and white illustration of a coffee mug on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a pair of cylindrical towers with a staircase leading up to the top of one of the towers", "a 3d model of a small house and barn"], "question": "which building has a blue roof", "label": 1}, {"captions": ["a 3d printed model of a small white house.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["d62a9376f8be4c7585d15ccf68c51239", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["size, material, color", "a, material, clay"], "captions_pred_pc": ["a black and white image of a building with a lot of dots", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a small white birdhouse", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": [" a building with yellow and white columns, wooden floor, and a ceiling featuring numerous yellow poles.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["1cf4b8f4e6014d36b6537c6ef52ccb96", "c3a82df41875402285608ef13a55df57"], "properties": ["floor, ceiling, poles", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white drawing of a square with dots on it", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a building with multiple levels", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a small house on an island with trees, shrubs, a pool, and a lake.", "a featuring a lamp, harp, white bowl, and white curved wall."], "sample_ids": ["c8331489fca44685bedfa1bdadf6ccb3", "55bcec23e1b34f0d9d748b4dcc3ea123"], "properties": ["house, lake, pool", "lamp, harp, bowl"], "captions_pred_pc": ["a black and white image of a pattern on a piece of paper", "a black and white illustration of a curved line"], "captions_pred_image": ["a 3d model of a large building", "a 3d model of a harp in a white room"], "question": "which entity has a white curved wall?", "label": 1}, {"captions": ["a white ceramic vase with the words 'happy' and 'fish' written on it.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["243cd2c469984313b1522dca099eefd3", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["color, white, material, ceramic", "door, lock, handle"], "captions_pred_pc": ["a black and white image of a vase in the shape of a fish", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a white vase on a grey background", "a black and white image of a door with a crack in it"], "question": "which object is made of wood", "label": 1}, {"captions": [" of a flat roof with a floor heating system and ceiling featuring wood beams and light.", " a house with a roof, wooden beams, and chimney."], "sample_ids": ["30a0b34d05ae48cf9f9e26eb0d842e50", "be1376023c274bdda995d54f3694157f"], "properties": ["ceiling, beams, light", "roof, beams, chimney"], "captions_pred_pc": ["a black and white drawing of a window", "a black and white drawing of a bathroom with a shower"], "captions_pred_image": ["a 3d model of a brick wall with a roof on top", "a 3d model of a house with a roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" a medieval stone castle with walls and stairs.", " a house with a yard, trees, bushes, and surrounding buildings."], "sample_ids": ["10bc1bbec4c045f9b15fc2156b5e32ee", "7f8942ef51dd4246993a587a12df168c"], "properties": ["wall, stairs, castle", "house, yard, surrounding buildings"], "captions_pred_pc": ["a castle in the snow a castle in the snow illustration on a white background royalty free illustration", "a black and white image of a truck on a white background"], "captions_pred_image": ["a 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the", "a 3d model of a house in the middle of a field"], "question": "which entity is a house?", "label": 1}, {"captions": [" of a small island featuring a white lighthouse, a fountain, and a grassy crater.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["2a30e69498ff4fd1a33c1fb72286f553", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["lighthouse, fountain, crater", "roof, color, yellow"], "captions_pred_pc": ["a black beanie with sparkles on a white background", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of an object on top of a pedestal", "a 3d model of a house with a roof"], "question": "which entity has a roof that is the color of yellow", "label": 1}, {"captions": [" a house with a roof structure and toothbrushes.", "a featuring a flying plane, a destroyed plane, a large airborne ship, a building with a broken roof, and a broken piece of metal."], "sample_ids": ["7632d1ba4e8144c19484c263b6074d0c", "4839e3b998ff4f6a84de50488ffae3ba"], "properties": ["house, roof, toothbrushes", "a, building, roof"], "captions_pred_pc": ["a black and white illustration of the letter 'b' isolated on a white background illustration", "for a black and white drawing of a person holding a pencil"], "captions_pred_image": ["a 3d rendering of a white box with a lot of blades", "a 3d model of the space shuttle"], "question": "which building has a roof", "label": 1}, {"captions": [" a large, multi-floor building with columns, shelves, conveyor table, and a ceiling structure featuring pipes.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["6d773d2b0ed9437ea2b9b352bd8a5c25", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["building, floor, columns", "island, mountain, grass"], "captions_pred_pc": ["in one line a black and white drawing of a dotted pattern on a white background", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d rendering of a large white table with multiple shelves", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more grass", "label": 1}, {"captions": ["a small green and white 3d teapot in the shape of a turtle.", " a small triangular-shaped object."], "sample_ids": ["8df8b28138e040a89303e91518b09d59", "2d02985030804209a26c2c53b96a06f9"], "properties": ["shape is turtle, color is green, white", "shape, triangle, small"], "captions_pred_pc": ["above a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black", "a black and white image of a piece of metal"], "captions_pred_image": ["a white ceramic teapot on a gray background", "a black piece of furniture on a white background"], "question": "which object is smaller", "label": 1}, {"captions": [" a small white barn with a metal roof.", " a brick building with a roof structure and roof truss."], "sample_ids": ["4ca3342a96824684845f7d0e062ab176", "84e8acad28664a738df69d719df9e263"], "properties": ["roof, metal, white", "roof, structure, truss"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a house made of dots", "a black and white polka dots pattern on a white background polka dots pattern on a white background illustration"], "captions_pred_image": ["a 3d model of a barn", "a 3d model of a brick building with a roof"], "question": "which building has a roof structure and roof truss?", "label": 1}, {"captions": ["royalty-free of a beaver on a skateboard.", " a white and gold mirror on a wooden easel stand."], "sample_ids": ["9b6a637ead444bd28452178c74697653", "0d10d734448d4a5d8d07b938c12d9d80"], "properties": ["image is royalty-free, beaver, skateboard", "color, white, gold"], "captions_pred_pc": ["for a black and white image of a person's hand holding a toothbrush", "for a black and white image of a shoe on a white background"], "captions_pred_image": ["a beaver on a skateboard royalty free 3d model preview no. 1", "a 3d model of a standing mirror on a white background"], "question": "which object is white and gold?", "label": 1}, {"captions": [" a white hospital operating room with blue containers and medical equipment.", " a black and white cube-shaped building with a staircase."], "sample_ids": ["a10f3196831647bc8842eacac640047d", "587e65f2d904440488a98dfa9a4e9dbe"], "properties": ["color, white, containers", "shape is cube, color is black, white"], "captions_pred_pc": ["a black and white illustration of the letter 'f' made up of tiny dots", "above a black and white photograph of a sculpture"], "captions_pred_image": ["a 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room", "a black and white 3d model of a building"], "question": "which entity is a cube?", "label": 1}, {"captions": [" of a rock formation with a white cliff and a rock.", " of a wine corkscrew"], "sample_ids": ["4a25f6dfbea943bca137dacd2f7b984f", "07047b273add4f6fb2075fd176a50cd9"], "properties": ["image is rock formation with a white cliff and a rock", "a, type, corkscrew"], "captions_pred_pc": ["above a black and white map of spain on a white background", "a black and white image of a corkscrew"], "captions_pred_image": ["a black and white image of a rock formation on a gray background", "a 3d model of a wine bottle opener"], "question": "which object is a type of corkscrew", "label": 1}, {"captions": [" a small island with trees, water, and a river.", " a small house with a roof and ceiling-mounted air conditioner."], "sample_ids": ["5a9c593092c04deaa0f17a1c28a79476", "6965067ea9e34357a7af21a2f078fbac"], "properties": ["water, river, island", "roof, air conditioner, house"], "captions_pred_pc": ["in 15 words or less a black and white polka dot pattern on a white background", "a black and white illustration of a window"], "captions_pred_image": ["a 3d model of a snowy landscape with trees in the foreground royalty free 3d model preview no.2", "a 3d rendering of a small house with a covered porch"], "question": "which entity is not a small island with trees", "label": 1}, {"captions": [" of a metal tool with a yellow handle, a laptop, and a ceiling light fixture.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["b714bf13e9e54acb867c2c1b3ccf8ae8", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["metal, laptop, light fixture", "island, mountain, grass"], "captions_pred_pc": ["for a black and white image of a corner shelf", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d model of a telescope on a stand", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more grass", "label": 1}, {"captions": [" a tall glass tower featuring blue, green, and white squares.", "a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars."], "sample_ids": ["405d68eda26c401fbcddc7e3a457c74e", "1b3945962a4b4cda9fe939dc5d63e789"], "properties": ["color, shape, height", "a room, a cake, a table"], "captions_pred_pc": ["of a black and white photo of a pair of earrings", "a black and white illustration of an object on a white background"], "captions_pred_image": ["a black and white photograph of a metal sculpture on a pedestal", "a 3d rendering of a white room with various items in it"], "question": "which entity has a room?", "label": 1}, {"captions": [" an old building with windows, doors, and a balcony on a street.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["706fb93f885d42f594e0ebbba632d2f2", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["building, balcony, street", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["in 15 words or less a black ink brush stroke on a white background", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a 3d model of an old building", "a white 3d model of a city skyline"], "question": "which image shows a laptop?", "label": 1}, {"captions": ["a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it.", " of a white chest of drawers with legs."], "sample_ids": ["9bcb7cc44b444326bc426cd9e2aacf60", "f00dfa8b5e7e4fc6bbf97d718b66f390"], "properties": ["- material is plastic- color is green- shape is box", "chest of drawers, legs, white"], "captions_pred_pc": ["a black and white illustration of a toilet brush and toilet brush holder", "of a black and white leopard print rug"], "captions_pred_image": ["a 3d rendering of a plastic box with several compartments", "a 3d rendering of a white dresser"], "question": "which object is made of wood", "label": 1}, {"captions": [" a yellow and white structure with yellow poles.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["bada91e216fd486d9e3e356d48978f33", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, shape, poles", "a, material, clay"], "captions_pred_pc": ["a black and white drawing of a dotted pattern on a white background a black and white drawing of a dotted pattern on a white background royalty free illustration", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a building with multiple levels", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": [" a small building with a roof.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["a3089017e4c5463d852de65abf61eda5", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["roof, building, small", "island, terrain, water"], "captions_pred_pc": ["in 15 words or less a black and white image of a building with dots all over it", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a black and white image of a small house", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" of a round birthday cake with a single candle in the middle.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["76c834f15f664dbdb7c08ca1ff936e7c", "06a1c233fb444830b577aa06e2c01294"], "properties": ["candle, color, shape", "house, tree, hill"], "captions_pred_pc": ["a black and white illustration of a shower head", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a birthday cake with a candle on top royalty free 3d model", "a black and white image of a house in the middle of a field"], "question": "which entity has more trees", "label": 1}, {"captions": [" a small wooden building with a roof, resembling a birdhouse or cabinet.", " of two rocks with ice elements."], "sample_ids": ["a60e4d5f34aa4a6280343a8f15bb1c13", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["house, roof, wooden", "image is a rock with ice elements"], "captions_pred_pc": ["for a black and white image of an object on a white background", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a black and white photograph of a birdhouse", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a building with blue metal framing and structure.", " of a small white building with stairs and a lid."], "sample_ids": ["ce40210c2a7e49dfaebbd934ccec4eca", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["color, blue, structure", "building, stairs, lid"], "captions_pred_pc": ["a black and white image of dots on a white background", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a building under construction", "a 3d model of a white box on a gray background"], "question": "which building has a lid?", "label": 1}, {"captions": [" a black and red robot with a yellow head, resembling an insect and featuring red and yellow flame-like details.", " a concrete wall with peeling paint and rusted metal features."], "sample_ids": ["84ecc9f87efc4638acb5532ecd55b809", "4376831ff557462dbacc4cce88a8cc86"], "properties": ["color, head, body", "paint, rust, concrete"], "captions_pred_pc": ["a black and white illustration of a teddy bear made of dots on a white background illustration", "above a black and white image of a shelf on a white background"], "captions_pred_image": ["a 3d model of a robot in low poly style", "a 3d model of a concrete wall"], "question": "which entity is more likely to have rust", "label": 1}, {"captions": [" a white plastic box/tray with a hole in the middle.", "a black and gold cylindrical object with a gold handle, resembling a cigarette lighter, lamp, and pen."], "sample_ids": ["04f8bfad8ad14795aced8a83ea30ca60", "c9b1c89380e947f58aa06eb56c93c6d8"], "properties": ["color is white, material is plastic, shape is box", "- color is black and gold- shape is cylindrical- material is metal"], "captions_pred_pc": ["a black and white image of a rectangular tray on a white background", "a black and white image of a circular object on a white background"], "captions_pred_image": ["a 3d model of a white plastic tray", "a 3d model of a black and white object on a gray background"], "question": "which object is made of metal", "label": 1}, {"captions": [" a wooden staircase and small table in a room with wooden floor and ceiling.", " a house with a roof and beams."], "sample_ids": ["c6b89316941b4d7c9562cd2ec0bf4706", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["floor, staircase, table", "roof, beams, house"], "captions_pred_pc": ["above a black and white photograph of an object on a white background", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a boat on a wooden surface royalty free 3d model no.3", "a 3d model of a building with a roof"], "question": "which entity has a roof and beams", "label": 1}, {"captions": [" a small white and brown house with windows and a door.", " of a toy mushroom character with a white and brown head."], "sample_ids": ["7de99253a4bc4d98bd941e40bbad8c7a", "ae8a73809d4647c09cc82f403e47de1d"], "properties": ["color, door, window", "color, head, white and brown"], "captions_pred_pc": ["of a person wearing a black and white striped shirt", "a black and white illustration of a butterfly sitting on a dandelion stock illustration"], "captions_pred_image": ["a 3d model of a police box royalty free 3d model preview no.2", "a 3d model of a gray and white cartoon character"], "question": "which entity has a white and brown head?", "label": 1}, {"captions": [" a multicolored rock with various green, brown, and other hues, featuring a hole.", " a white castle composed of small cubes."], "sample_ids": ["04be3554cef349f2bc631b7f30898228", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["color, shape, material", "composed of, white, cubes"], "captions_pred_pc": ["a map of the country in black ink on a white background royalty free illustration", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["a 3d model of a rock on a white background royalty free 3d model preview no 1", "a 3d model of a castle made of white cubes"], "question": "which entity is composed of small cubes", "label": 1}, {"captions": ["a featuring a small island with trees, mountains, a house on a hill, a large building, and a boat floating in the water.", " a dragon flying above a city, accompanied by a train, various objects, animals, and a person walking."], "sample_ids": ["37bdbc633c9545878a98ff47c3029e32", "f69264c33c324343b8a0a35d49ae0942"], "properties": ["a, island, water", "a city, train, dragon"], "captions_pred_pc": ["a black and white photo of a boat in the water", "a black and white image of a toy car"], "captions_pred_image": ["a 3d model of a building with trees surrounding it", "a line drawing of an airplane flying in the sky"], "question": "which entity has a train?", "label": 1}, {"captions": [" of a wooden windmill with a red roof.", " a large white and metal building with a metal roof structure."], "sample_ids": ["2ad8fca30285483d8b7f602fa078215d", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["roof, color, red", "roof, metal, white"], "captions_pred_pc": ["a white and black image of a snowflake in the shape of a snowflake", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a 3d model of a windmill on a gray background", "a 3d model of a large white box"], "question": "which roof is made of metal", "label": 1}, {"captions": [" of a snowman with a white umbrella and a white coat.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["bbfff7ef6ab14b30bc7b5a3aa8391f95", "bded33af34104b9686b845dfd18309a9"], "properties": ["color, umbrella, coat", "table, staircase, light"], "captions_pred_pc": ["a black and white image of a tree made of dots", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d model of a snowflake on a white background", "a 3d model of a small table with a staircase"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" a white and yellow metal shelving unit with a steel structure, yellow legs, and suspended ceiling system.", " a small table with a staircase and a square ceiling light."], "sample_ids": ["c1a7d264b34841409009b3d5d39d5b99", "ef0cbf4628b74253b885480deefe6fbd"], "properties": ["Steel, Color, Yellow", "table, staircase, light"], "captions_pred_pc": ["a black and white illustration of a building", "for a black and white image of a small square on a white background"], "captions_pred_image": ["a 3d model of a table with multiple tables and chairs", "a 3d model of a table with a staircase"], "question": "which object is made of wood", "label": 1}, {"captions": [" a small square table with an integrated staircase and a ceiling light.", " a small wooden house with a green roof."], "sample_ids": ["bded33af34104b9686b845dfd18309a9", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["table, staircase, light", "roof, color, green"], "captions_pred_pc": ["above a black and white image of a square with a square in the center", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d model of a small table with a staircase", "a 3d model of a house with a ladder"], "question": "which object has a roof", "label": 1}, {"captions": [" of a multi-colored spooling machine with wires and circuit board, featuring a lamp and shelf with toys.", " of a wooden truck with a load of snow and crates, driving on a pile of wood."], "sample_ids": ["0e3f5cc16806492b948d41a748819ce3", "ac1e2dcbe71945d58204f95d16d658b1"], "properties": ["color, shape, material", "load, crates, snow"], "captions_pred_pc": ["a black and white image of a decorative tile", "above a 3d image of a bench with a white background"], "captions_pred_image": ["a 3d rendering of an electronic device on a white surface", "a truck with a pile of snow on top of it"], "question": "which object is made of wood", "label": 1}, {"captions": [" a modern office building with a green door, green roof, windows, and blue lights.", " a large steel building with many columns and a pool."], "sample_ids": ["d6087023095446fbadef1721478373b2", "2ce649a4152a45bab60d8cafa1dcdeb3"], "properties": ["door, roof, window", "building material, pool, steel"], "captions_pred_pc": ["a black and white drawing of a toilet brush", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d model of an apartment building", "a 3d model of a concrete structure"], "question": "which building has a pool", "label": 1}, {"captions": [" a brown couch with peeling paint and tape on it.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["8da7e0d122f544e2862b4e592988e183", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["brown, paint, tape", "island, mountain, grass"], "captions_pred_pc": ["above a black and white image of a couch with a leopard print pattern", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d model of a couch royalty free 3d model preview no.2", "a 3d image of a small island in the middle of a lake"], "question": "which entity is a mountain?", "label": 1}, {"captions": [" a wooden bench with a metal frame.", " a wooden object, including a board, piece of wood, box, and shelf."], "sample_ids": ["c27b018330da406680e58b94266c310b", "c986212445a1466ca7be7b5ac6bea729"], "properties": ["frame, material, wood", "wood, board, shelf"], "captions_pred_pc": ["a black line on a white background", "a black and white drawing of snowflakes on a white background"], "captions_pred_image": ["a 3d model of a bench", "a 3d rendering of a piece of marble"], "question": "which object is made of wood", "label": 1}, {"captions": [" a small village featuring houses, trees, and a winding road.", " a large steel building with a pool."], "sample_ids": ["7acf46c0265d4e39b97ac084852abde8", "72eed67d8d884c819b28e2e95eb48f06"], "properties": ["houses, trees, road", "building material, pool, steel"], "captions_pred_pc": ["in 15 words or less a black and white photo of a mountain landscape", "a black and white illustration of a building with a tree growing out of it"], "captions_pred_image": ["a black and white photograph of a small town", "a 3d model of a concrete structure"], "question": "which building is made of steel", "label": 1}, {"captions": ["a white teddy bear, cloud, skull, octopus, and bird in various positions on a gray background.", " a house with a roof structure and toothbrushes."], "sample_ids": ["dd5849aced0443b1b4b38d413f7e06c4", "7632d1ba4e8144c19484c263b6074d0c"], "properties": ["background, color, white", "house, roof, toothbrushes"], "captions_pred_pc": ["a black and white image of a cat's head", "a black and white illustration of the letter 'b' isolated on a white background illustration"], "captions_pred_image": ["a 3d model of an animal skull in white on a gray background", "a 3d rendering of a white box with a lot of blades"], "question": "which entity has a roof structure", "label": 1}, {"captions": [" three differently colored wooden cubes with holographic patterns and a light inside one cube.", " of a hammer with a long metal handle."], "sample_ids": ["1be0870f6d324089a3e3b60a029df6f8", "30f4b6bcbbb44f568cab4fd439d05145"], "properties": ["color, material, shape", "handle, metal, long"], "captions_pred_pc": ["of a set of 3 black ceramic tiles on a white background", "a black and white toothbrush on a white background"], "captions_pred_image": ["a 3d rendering of three cubes on a white background", "a hammer with a wooden handle and metal head"], "question": "which object has a long metal handle", "label": 1}, {"captions": [" a house featuring a roof, floor plan, heating system, and ceiling light fixture.", " a small wooden house."], "sample_ids": ["269939560e08432f9e134309b9a1c587", "4cb4dba1237443eb8dc299530fa12521"], "properties": ["floor plan, heating system, ceiling light fixture", "house, material, wood"], "captions_pred_pc": ["a black and white drawing of a house", "a house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots"], "captions_pred_image": ["a 3d model of a building with a glass facade", "a 3d model of a small cottage"], "question": "which house is made of wood", "label": 1}, {"captions": [" a white plastic container with a lid, a small box, a cup, a bottle, and a jar.", " a large white and metal building with a metal roof structure."], "sample_ids": ["20a02705a66f460492e07345e84a62ed", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["a box, a cup, a bottle, a jar", "roof, metal, white"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic cap, and a plastic container", "a 3d model of a large white box"], "question": "which entity has a white roof", "label": 1}, {"captions": ["a featuring a small island with a mountain, a lake in the middle, and a bird flying above.", " a long row of steel shelves in a warehouse, featuring a suspended scaffolding system."], "sample_ids": ["e51fead89ae64968b2ca7f4ccb6d3b97", "578fe7a7bd754b889be33aea99cf5050"], "properties": ["a, bird, lake", "a, material, steel"], "captions_pred_pc": ["a black and white photograph of a cloudy sky", "above a black and white image of a rack with multiple shelves"], "captions_pred_image": ["a 3d model of a mountain range in black and white", "a 3d model of a large metal structure"], "question": "which object is made of steel", "label": 1}, {"captions": [" of a white plastic tube or metal bar, resembling a knife.", " a sword with a wooden handle."], "sample_ids": ["8fd3836862a44a8d8b4d224bfc30c2c3", "d42eec2b8e7644ec9ededcf9f325faad"], "properties": ["a knife, blade, handle", "handle, material, wood"], "captions_pred_pc": ["a black and white image of a shelf with a white background", "a black and white image of a knife on a white background"], "captions_pred_image": ["a piece of white plastic on a gray background", "a black and white image of a sword"], "question": "which object has a handle made of wood", "label": 1}, {"captions": [" a glass bottle with liquid, ice, and a lid, resembling a salt shaker and a human figure.", " a spider-like creature with long arms and legs."], "sample_ids": ["bb01282cb8b64470866260455b0b46fa", "199bcb789e0c439bb2eeb32f2425cc36"], "properties": ["liquid, ice, lid", "arachnid, leg, arm"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a dandelion on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration on a white background dandelion illustration", "a black and white illustration of a spider on a white background"], "captions_pred_image": ["a 3d model of a plastic bottle on a white background", "a black and white image of an alien creature"], "question": "which entity has more legs", "label": 1}, {"captions": [" a small house with stairs and a roof.", " a small house with a tree in front, situated on a hill."], "sample_ids": ["e9305c80010f4e3b9de9789f01a9bee5", "3bde44b5f10946398f1bb9843dc14caa"], "properties": ["roof, stairs, house", "house, tree, hill"], "captions_pred_pc": ["above a black and white image of a square with dots all over it", "a black and white photo of a cell phone in a puddle of water"], "captions_pred_image": ["a 3d rendering of a podium on a wooden floor", "a 3d model of a house in the middle of a field"], "question": "which house has a tree in front", "label": 1}, {"captions": [" a white octagonal cylinder with a hole, resembling a recessed light fixture.", " of a white human skull with broken bone elements."], "sample_ids": ["827a77638bfa436f9aec49bf851fe00c", "6ca0f91b85464d7a845b3977351dd0b5"], "properties": ["size, color, shape", "color, white, skull"], "captions_pred_pc": ["a black and white circular ornament on a white background a black and white circular ornament on a white background royalty free illustration", "a black and white image of a cat's x-ray"], "captions_pred_image": ["a 3d model of a white object on a gray background", "a 3d model of a human skull in white"], "question": "which entity is white", "label": 1}, {"captions": [" a spiral staircase with a railing and wooden floor in a house.", " a small wooden house."], "sample_ids": ["40921ffd69db479294554d261daf3035", "4cb4dba1237443eb8dc299530fa12521"], "properties": ["floor, railing, staircase", "house, material, wood"], "captions_pred_pc": ["above a black and white image of a computer screen", "a house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots"], "captions_pred_image": ["a 3d model of a spiral staircase", "a 3d model of a small cottage"], "question": "which house is made of wood", "label": 1}, {"captions": ["a white of a gun.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["d7c12235efd1471db5b7145b63dbd11a", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, white, gun", "a, material, clay"], "captions_pred_pc": ["of a white object on a white background", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a white 3d model of a rifle on a gray background", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": ["a featuring a small boat, a rock with a hole, and blue water.", " a stone wall featuring carvings and statues."], "sample_ids": ["7ccdffc0d6404e8d9144260255ea0c5c", "42f663140f834d1ab5f95cd8a5ad04b3"], "properties": ["water, boat, rock", "carving, statue, wall"], "captions_pred_pc": ["a black and white illustration of a surfboard", "a black and white image of a snowflake on a white background"], "captions_pred_image": ["a 3d image of an animal laying on the ground", "a 3d image of a group of statues on a wall"], "question": "which entity is a wall?", "label": 1}, {"captions": ["a featuring a kite, paper airplane, and birds flying alongside a red, white, and blue plane in the sky.", " a small house on a hill in a field."], "sample_ids": ["f7bb7dcf3c774149809444d6c7d20ab8", "bd873071252047d38160c4a5fdd2c1b7"], "properties": ["color, plane, sky", "house, hill, field"], "captions_pred_pc": ["a silhouette of a man and a child standing on the edge of a cliff silhouette of a man and a child standing on the edge of a cliff silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of", "a black and white photograph of a piece of paper"], "captions_pred_image": ["a 3d sculpture of a bird in flight on a gray background", "a black and white image of a small house"], "question": "which entity is in a field?", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", "a featuring a small room with a bunk bed, desk, chair, table, and a blue house."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "dd3a9323ed514ccab330973ff9588015"], "properties": ["yellow, table, roof", "room, bed, desk"], "captions_pred_pc": ["a black and white drawing of a floor plan", "a black and white drawing of a door"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of a small room with a bunk bed"], "question": "which entity has a bed?", "label": 1}, {"captions": [" a white rocking chair with a curved backrest.", " a stone, wood, rock, sliced bread, and a skull with a blue hat."], "sample_ids": ["ee0deb90abf943b6894cd5ded1331213", "0169af65ffc64bbf8e2fe6c6de08d485"], "properties": ["backrest, curved, yes", "hat, skull, bread"], "captions_pred_pc": ["a black and white illustration of a spiral pattern on a white background a black and white illustration of a spiral pattern on a white background royalty free illustration", "a black and white illustration of a skull in the shape of dots"], "captions_pred_image": ["a 3d model of a white chair royalty free 3d model no. 3", "a black and white image of a stone sculpture"], "question": "which entity has a skull with a blue hat?", "label": 1}, {"captions": [" a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light.", " a small house with a roof and ceiling-mounted air conditioner."], "sample_ids": ["5ea0962b100b4fccb761ed84afe027b5", "6965067ea9e34357a7af21a2f078fbac"], "properties": ["house, table, chair", "roof, air conditioner, house"], "captions_pred_pc": ["above a black and white photograph of an open door", "a black and white illustration of a window"], "captions_pred_image": ["a 3d rendering of a small white table with a chair", "a 3d rendering of a small house with a covered porch"], "question": "which house has a roof?", "label": 1}, {"captions": [" a yellow and white structure with yellow poles.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["bada91e216fd486d9e3e356d48978f33", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["color, shape, poles", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["a black and white drawing of a dotted pattern on a white background a black and white drawing of a dotted pattern on a white background royalty free illustration", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a building with multiple levels", "a white 3d model of a city skyline"], "question": "which entity is a shelf?", "label": 1}, {"captions": [" of a wooden stand featuring two legs, six balls, and a row of teddy bears, resembling a puzzle toy, game board, and bookshelf.", " a white fox."], "sample_ids": ["e36ba9c060cd49f48a0acc1790fcf049", "faf57e4fc9e44f34b19d2538d5138519"], "properties": ["resembles, toy, bookshelf", "color, white, fox"], "captions_pred_pc": ["a black and white image of a book cover", "above a black and white image of a fish"], "captions_pred_image": ["a 3d rendering of a wooden display stand with six cups on it", "a 3d model of a white fox on a gray background"], "question": "which entity is white", "label": 1}, {"captions": [" of a white and brown sea shell with a hole and small pearls on it, resembling a sea urchin.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["411c164757fc4de68dfecb35fa858223", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["resembles, sea urchin, shell", "a, material, clay"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a sea urchin", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": ["a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["ef8288c9fdfc4e0f9c1fe25d570a104e", "b896a0898efe4059a776193c02132129"], "properties": ["color is white, yellow, plastic", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white image of a metal bowl with dots", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a white plastic container with a label on it", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": [" of a black and white coffee table with a laptop, featuring a performance studio and ceiling grid structure.", " a house with a roof, roof truss, and suspended ceiling structure."], "sample_ids": ["a177693cc8c7428292680816001b48c6", "5abf69f79b92484fb54d41ff0c0a2c11"], "properties": ["black, white, laptop", "roof, truss, suspended ceiling"], "captions_pred_pc": ["a black and white drawing of a room with dots on the floor", "a suitcase made of dots on a white background a suitcase made of dots on a white background royalty free illustration"], "captions_pred_image": ["a dishwasher with a dish inside it", "a 3d model of a house with roof trusses"], "question": "which entity has a ceiling structure", "label": 1}, {"captions": [" of a white spiral, earbuds, ceiling light, and silver ring with black and white scissors and design.", "a 3d wooden toy castle with red and yellow towers, a red roof, and a man inside."], "sample_ids": ["c69f60b389124ad9b4f81c64ec332054", "311f6655ed854899b07ea10f3613ef7a"], "properties": ["earbuds, light, ring", "a, color, red"], "captions_pred_pc": ["a black and white drawing of a needle and thread", "a black and white drawing of a wallet on a white background"], "captions_pred_image": ["a black and white illustration of a pair of sunglasses and a pair of scissors next to each other on a white background", "a 3d model of a castle with two towers"], "question": "which entity has a red roof", "label": 1}, {"captions": [" a white car alternator, motor, and fan with its parts.", " a small house with a roof and door, resembling a shack or shed."], "sample_ids": ["4f3258f5264b46b1b839d992ca1bad68", "f1b557775310478893242180defa4d80"], "properties": ["color, alternator, fan", "shack, roof, door"], "captions_pred_pc": ["a black and white image of an object on a white background", "a black and white illustration of a telephone on a white background"], "captions_pred_image": ["a 3d model of a fan and its components on a white background", "a 3d model of a small house in the middle of a field"], "question": "which entity is a shack?", "label": 1}, {"captions": [" a spiral staircase with a railing in a small building.", " a wooden staircase with a red and white railing and a red arrow."], "sample_ids": ["28cae056856c4a8ba9d1a6af5355f831", "bb8bb4c9972d4b718b8bbd3ed5fdd14d"], "properties": ["staircase, railing, building", "arrow, red, white"], "captions_pred_pc": ["a black and white photograph of a light switch", "above a black and white image of a square with a white cross in the center"], "captions_pred_image": ["a 3d model of a staircase in a white room", "a 3d model of a spiral staircase"], "question": "which staircase has a red arrow?", "label": 1}, {"captions": [" a leather recliner chair and ottoman set, featuring swivel functionality and available in modern orange, tan, and brown colors.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["44be138ae8e2409bbbca44a96fc67d45", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["color, tan, brown, orange", "door, lock, handle"], "captions_pred_pc": ["above a black and white illustration of an office chair", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a grey leather lounge chair with ottoman and footstool", "a black and white image of a door with a crack in it"], "question": "which entity has a lock", "label": 1}, {"captions": [" of a small island featuring a white lighthouse, a fountain, and a grassy crater.", "a white of a spaceship and building."], "sample_ids": ["2a30e69498ff4fd1a33c1fb72286f553", "bf7d4277c9184d35abdec85bd5e25956"], "properties": ["lighthouse, fountain, crater", "image, building, spaceship"], "captions_pred_pc": ["a black beanie with sparkles on a white background", "a black and white drawing of a tree"], "captions_pred_image": ["a black and white image of an object on top of a pedestal", "a 3d model of a white object on a gray background"], "question": "which image shows a spaceship and building?", "label": 1}, {"captions": ["a low poly of a plant on a white object, resembling a paper or plastic bag.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["d49d8ed244094349a99e4faca05e0690", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["low poly, plant, white", "roof, color, yellow"], "captions_pred_pc": ["a black and white illustration of a dandelion on a white background a black and white illustration of a dandelion on a white background royalty free illustration", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a plant growing out of a rock", "a 3d model of a house with a roof"], "question": "which object is white", "label": 1}, {"captions": [" a wooden staircase and small table in a room with wooden floor and ceiling.", " of a small white building with stairs and a lid."], "sample_ids": ["c6b89316941b4d7c9562cd2ec0bf4706", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["floor, staircase, table", "building, stairs, lid"], "captions_pred_pc": ["above a black and white photograph of an object on a white background", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a boat on a wooden surface royalty free 3d model no.3", "a 3d model of a white box on a gray background"], "question": "which entity has a lid?", "label": 1}, {"captions": [" a small white and brown house with windows and a door.", " a small white building featuring a room with a door and a white shelf."], "sample_ids": ["7de99253a4bc4d98bd941e40bbad8c7a", "d7b78fa9a6b64f6095b881bc619b04fe"], "properties": ["color, door, window", "room, door, shelf"], "captions_pred_pc": ["of a person wearing a black and white striped shirt", "above a black and white illustration of a person standing in front of a door"], "captions_pred_image": ["a 3d model of a police box royalty free 3d model preview no.2", "a 3d model of an empty room"], "question": "which building has a room with a door and a white shelf?", "label": 1}, {"captions": [" a house with a pink roof, truss, and a square white ceiling lamp.", " a spider-like creature with long arms and legs."], "sample_ids": ["91b2e9e4660946f5b4808a18b5323b69", "199bcb789e0c439bb2eeb32f2425cc36"], "properties": ["roof, truss, lamp", "arachnid, leg, arm"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white illustration of a spider on a white background"], "captions_pred_image": ["a 3d model of a house with a metal roof", "a black and white image of an alien creature"], "question": "which entity has more legs", "label": 1}, {"captions": [" of a white plastic bar, resembling a shelf or towel rod, with a metal light fixture.", " a futuristic, small spaceship with two propellers, resembling a jet fighter."], "sample_ids": ["f84dec547a3d4710b48db11fc9fa489a", "832a022cdcc74763b0571e04af4e592b"], "properties": ["light source, fixture, color", "resembles a jet fighter, futuristic, small"], "captions_pred_pc": ["a black and white photo of a small square on a white background", "a black and white illustration of a person's hand holding a pencil"], "captions_pred_image": ["a 3d rendering of a white object on a gray background", "a lego model of a futuristic airplane"], "question": "which entity is a spaceship?", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", "a white ceramic vase with the words 'happy' and 'fish' written on it."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "243cd2c469984313b1522dca099eefd3"], "properties": ["color, shape, and size", "color, white, material, ceramic"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "a black and white image of a vase in the shape of a fish"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a white vase on a grey background"], "question": "which object is white", "label": 1}, {"captions": [" a white lighthouse building with a clock tower, rocket, and ladder.", " a white building with a red roof."], "sample_ids": ["88eba412c78a4ced89eb857327653f6c", "1f9580be397d4f948bf53fe1d5bc5756"], "properties": ["lighthouse, building, tower", "color, white, roof, red"], "captions_pred_pc": ["a black and white illustration of a water droplet on a white background", "in 15 words or fewer a black and white drawing of a camera"], "captions_pred_image": ["a 3d model of a white sofa on a white background", "a 3d model of an office building"], "question": "which building is white", "label": 1}, {"captions": [" a white plastic container with a lid, a small box, a cup, a bottle, and a jar.", " a suitcase on wheels, featuring a bird, a laptop, a robot, and a vacuum cleaner."], "sample_ids": ["20a02705a66f460492e07345e84a62ed", "e8100bef7b8a48d4ac79684bffb349ba"], "properties": ["a box, a cup, a bottle, a jar", "Wheels, laptop, robot"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white drawing of a cell phone"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic cap, and a plastic container", "a black and white image of a suitcase on wheels"], "question": "which entity has a laptop?", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " a small white house with a roof."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["roof truss, insulation, suspended ceiling", "roof, color, white"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d model of a building with a white roof"], "question": "which house has a white roof", "label": 1}, {"captions": ["a featuring white and red cubes, and a pink and white chair.", " a small house with a roof and door, resembling a shack or shed."], "sample_ids": ["f2c44a82ba744ba8b93e9a1c2272c117", "f1b557775310478893242180defa4d80"], "properties": ["color, white, red, pink", "shack, roof, door"], "captions_pred_pc": ["a black and white illustration of a house made of dots", "a black and white illustration of a telephone on a white background"], "captions_pred_image": ["a 3d model of a white structure with stairs", "a 3d model of a small house in the middle of a field"], "question": "which entity is a shack?", "label": 1}, {"captions": [" of a wooden windmill with a red roof.", " a house with a pink roof, brick walls, and insulated ceiling."], "sample_ids": ["2ad8fca30285483d8b7f602fa078215d", "c8936ace72954650b4e2d84246964849"], "properties": ["roof, color, red", "roof, color, pink"], "captions_pred_pc": ["a white and black image of a snowflake in the shape of a snowflake", "a black and white drawing of a toilet"], "captions_pred_image": ["a 3d model of a windmill on a gray background", "a 3d model of a house with a roof"], "question": "which roof is pink", "label": 1}, {"captions": [" a building featuring yellow columns, a yellow roof, and a wooden structure.", " a house featuring a roof with wooden trusses and a ladder."], "sample_ids": ["0ce6a4102f4f40e2a0084938b0a93941", "3cd410c4359a4cef98702963a2b9802b"], "properties": ["structure, columns, roof", "roof, trusses, ladder"], "captions_pred_pc": ["a black and white drawing of a window", "a black and white drawing of a tv screen on a white background"], "captions_pred_image": ["a 3d model of a building with multiple levels", "a 3d model of the roof of a building"], "question": "which entity has a roof with wooden trusses?", "label": 1}, {"captions": [" a small bedroom with wooden floors, walls, roof, and shelf.", "a featuring a room with a staircase, a damaged bus and rv, two houses, a large white box, and destroyed buildings with debris."], "sample_ids": ["e602ac60041f4b4f84c044161e478781", "a47dcf3d3cf34c58af17b6715d6f1232"], "properties": ["floor, wall, roof", "room, staircase, bus"], "captions_pred_pc": ["above a black and white image of a decorative metal bar", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d model of a room with wooden walls and a rug on the floor", "a 3d image of a building with a lot of debris"], "question": "which entity has a damaged bus?", "label": 1}, {"captions": [" a crab with long legs, wires, and tentacles, resembling a hybrid of an octopus, squid, spider, and robot.", " of a cherry blossom bonsai tree with pink flowers."], "sample_ids": ["ec5914b53b6a4cde8de4820050bc46c5", "037fff0f153c41ea8b9c9392c2e2439a"], "properties": ["resembles, octopus, squid, spider, robot", "flower, color, pink"], "captions_pred_pc": ["a black and white illustration of a jellyfish", "for a black and white illustration of a person on a skateboard"], "captions_pred_image": ["a 3d model of a robotic octopus on a white background royalty free 3d model preview no.2", "a 3d model of a bonsai tree on a pedestal"], "question": "which entity has pink flowers", "label": 1}, {"captions": ["a 3d white object resembling a sphere, frog, egg, hat, and shell.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["0a8e0b95d8ce43ee9159ad01d925aad8", "bf18bfd89efd43389781050230467d58"], "properties": ["shape is sphere, color is white, material is plastic", "Lights, number, five"], "captions_pred_pc": ["a black and white illustration of a sponge in the shape of a sponge sponge black and white illustration of a sponge in the shape of a sponge royalty free illustration", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d sculpture of an apple on a white background", "a white chandelier with five white shades"], "question": "which object has more lights", "label": 1}, {"captions": [" a white motorcycle with wings.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["7e684a7c012c4fd0ac91844f22457640", "b896a0898efe4059a776193c02132129"], "properties": ["color, white, wings", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white image of a pair of sunglasses", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a motorcycle on a white background", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": [" a house with wooden framing and trusses.", " a small white house with a roof."], "sample_ids": ["4501794e257c4a8ba60a94757d8e93a9", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["frame, trusses, wood", "roof, color, white"], "captions_pred_pc": ["a black and white drawing of a window", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d model of a house under construction", "a 3d model of a building with a white roof"], "question": "which house has a roof that is white", "label": 1}, {"captions": [" of a metal tool with a yellow handle, a laptop, and a ceiling light fixture.", "a featuring a small boat, a rock with a hole, and blue water."], "sample_ids": ["b714bf13e9e54acb867c2c1b3ccf8ae8", "7ccdffc0d6404e8d9144260255ea0c5c"], "properties": ["metal, laptop, light fixture", "water, boat, rock"], "captions_pred_pc": ["for a black and white image of a corner shelf", "a black and white illustration of a surfboard"], "captions_pred_image": ["a 3d model of a telescope on a stand", "a 3d image of an animal laying on the ground"], "question": "which entity has a hole in it?", "label": 1}, {"captions": [" a red circular object with a checkered pattern, resembling a round pillow or bed cover.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["9cf9fb6d07084488892422a5a5be00ef", "c3a82df41875402285608ef13a55df57"], "properties": ["pattern, color, shape", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black circle on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a round cushion royalty-free 3d model preview", "a white plastic object on a gray background"], "question": "which object is white?", "label": 1}, {"captions": ["a 3d cartoon character wearing a hat and holding various items, such as a sword, gun, broomstick, and stick.", "a featuring a fireplace with a broom, shovel, and other items, a man and a dog, a table with a bucket, and a vase with a pipe."], "sample_ids": ["9863dee114264b89a95dca4c78d08424", "fdcbb46224a44faca5c3fb20264eb2e7"], "properties": ["hat, sword, gun", "broom, shovel, fireplace"], "captions_pred_pc": ["for a black and white illustration of a small monster holding a spoon in one hand and a cup of coffee in the other", "a close-up view of a white plastic bag with a small hole in it"], "captions_pred_image": ["a 3d model of a police officer holding a baton and wearing a hat and helmet", "a 3d model of an outdoor fireplace"], "question": "which entity has a fireplace?", "label": 1}, {"captions": [" of white spheres resembling a molecule.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["9d2c94d03ca745948b8cb4e8cafddb1c", "a17477b445b3443189dad22f768b888b"], "properties": ["color, shape, number", "roof, pillar, stairs"], "captions_pred_pc": ["of a black and white 3d model of a molecule on a white background a black and white 3d model of a molecule on a white background royalty free illustration", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d sculpture of a white ball on a gray background", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": ["a collection featuring a black umbrella, a hammer, a door, and an axe, all with wooden handles.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["c6e87b303e9945a3b0fc282e8527b473", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["collection, color, black, handle, wooden", "door, lock, handle"], "captions_pred_pc": ["a black and white photo of a person in a suit and tie standing in front of a white wall", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a black and white image of a door handle and a knife", "a black and white image of a door with a crack in it"], "question": "which door has a lock and handle", "label": 1}, {"captions": [" a white staircase with a black railing in a room.", "a white of a spaceship and building."], "sample_ids": ["d05c1b6047e145a4906c467a9ebe7430", "bf7d4277c9184d35abdec85bd5e25956"], "properties": ["color, white, railing, black", "image, building, spaceship"], "captions_pred_pc": ["above a black and white image of a computer screen", "a black and white drawing of a tree"], "captions_pred_image": ["a 3d rendering of a white staircase with a stainless steel handrail", "a 3d model of a white object on a gray background"], "question": "which image shows a spaceship and building?", "label": 1}, {"captions": ["purple speech bubble and circle", " a white motorcycle with wings."], "sample_ids": ["382b2c7ab4e14027a89be90966795733", "7e684a7c012c4fd0ac91844f22457640"], "properties": ["color, shape, size", "color, white, wings"], "captions_pred_pc": ["a black and white speech bubble on a white background", "a black and white image of a pair of sunglasses"], "captions_pred_image": ["a gray circle on a gray background with a small white dot in the center of the circle", "a 3d model of a motorcycle on a white background"], "question": "which object is white", "label": 1}, {"captions": ["a white featuring a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet.", " a gray object featuring an axe, guitar headstock, and head."], "sample_ids": ["f28f57745af04115b0bacdde80c3b9ee", "ac5c86f38c8e4570a7eefff0958185cf"], "properties": ["a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet", "Headstock, Guitar, Head"], "captions_pred_pc": ["a black and white illustration of a pair of sunglasses", "a black and white image of a toothbrush on a white background"], "captions_pred_image": ["a 3d model of a medieval drinking horn 3d model available for 3d printing royalty free 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d", "a 3d model of an axe head"], "question": "which object has a headstock", "label": 1}, {"captions": [" a bridge structure with a pier, stairway, railings, and red and green elements.", " a mountainous landmass, resembling a small island or state like kentucky or wyoming."], "sample_ids": ["23701a9674204ea881fb8af9914b1924", "8ca9b999b69c4965bd9eb4445d605bf2"], "properties": ["color, pier, railings", "mountainous, landmass, state"], "captions_pred_pc": ["a black and white illustration of a computer keyboard", "a black and white map of the state of new york"], "captions_pred_image": ["a 3d model of a staircase in the middle of a field", "a 3d model of a piece of paper"], "question": "which entity is a landmass", "label": 1}, {"captions": [" of a flat roof with a floor heating system and ceiling featuring wood beams and light.", " of a flat roof with a floor heating system and ceiling featuring wood beams and light."], "sample_ids": ["30a0b34d05ae48cf9f9e26eb0d842e50", "30a0b34d05ae48cf9f9e26eb0d842e50"], "properties": ["ceiling, beams, light", "ceiling, beams, light"], "captions_pred_pc": ["a black and white drawing of a window", "a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a brick wall with a roof on top", "a 3d model of a brick wall with a roof on top"], "question": "which entity has a floor heating system?", "label": 0}, {"captions": ["a featuring white and red cubes, and a pink and white chair.", " a white and blue building with a black roof."], "sample_ids": ["f2c44a82ba744ba8b93e9a1c2272c117", "c893118316ee43e18322e5964b2806c5"], "properties": ["color, white, red, pink", "color, white, blue, roof, black"], "captions_pred_pc": ["a black and white illustration of a house made of dots", "a black and white illustration of a person standing on top of a hill made up of tiny dots"], "captions_pred_image": ["a 3d model of a white structure with stairs", "a 3d model of a white building on a gray background royalty free 3d model no."], "question": "which entity has a black roof", "label": 1}, {"captions": [" a house with a yard, trees, bushes, and surrounding buildings.", "a 3d white axe, hammer, and spoon."], "sample_ids": ["7f8942ef51dd4246993a587a12df168c", "96d127abd21049689918e671ec613ef8"], "properties": ["house, yard, surrounding buildings", "axe, hammer, spoon"], "captions_pred_pc": ["a black and white image of a truck on a white background", "of a black lace belt on a white background"], "captions_pred_image": ["a 3d model of a house in the middle of a field", "a 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe"], "question": "which object is not a house?", "label": 1}, {"captions": [" of a large black mat with square grid design.", " a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles."], "sample_ids": ["72aac2e9ccd7482eb88e5e4bc204fbf3", "bd7aab78974643f5a0660c699daf8eb3"], "properties": ["size, color, design", "roof, color, yellow"], "captions_pred_pc": ["a black and white pattern on a white background", "a black and white drawing of a room"], "captions_pred_image": ["a 3d rendering of a black rubber mat on a gray surface", "a 3d model of a table and chairs on a white background"], "question": "which entity has a yellow roof", "label": 1}, {"captions": [" a molecule featuring green, red, and blue spheres.", " a clay pot with holes in it."], "sample_ids": ["1c0e821eb7c4489dbff9e20d7e8575a3", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["color, sphere, molecule", "hole, material, clay"], "captions_pred_pc": ["a black and white photograph of a group of geometric shapes arranged in the shape of a diamond", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a molecule in the shape of a pyramid", "a clay sculpture of a face with holes in it"], "question": "which entity is made of clay", "label": 1}, {"captions": [" of a wooden building frame with truss and roof structure.", " a small white house with a roof."], "sample_ids": ["1313f8185cf24f3bbd73ff4e4ddfab3e", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["frame, truss, roof", "roof, color, white"], "captions_pred_pc": ["a black and white image of a ladder on a white background", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d rendering of a bridge over a road", "a 3d model of a building with a white roof"], "question": "which entity has a roof that is white", "label": 1}, {"captions": ["a white of a spaceship and building.", " a large, black and white circular building, resembling a stadium or ring structure."], "sample_ids": ["bf7d4277c9184d35abdec85bd5e25956", "67f46bb0048244c687a58d1017a08f6b"], "properties": ["image, building, spaceship", "building, color, black and white"], "captions_pred_pc": ["a black and white drawing of a tree", "the letter c in black and white illustration of the letter c in black and white illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustr"], "captions_pred_image": ["a 3d model of a white object on a gray background", "a 3d model of a circular fence with black and white stripes"], "question": "which building is black and white", "label": 1}, {"captions": [" a small house with a roof.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["0d2246e433ce4066b76489f17ba8d694", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["roof, house, small", "house, roof, wooden"], "captions_pred_pc": ["a black and white square made up of small dots on a white background", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a house with a triangular roof", "a black and white photograph of a birdhouse"], "question": "which house has a wooden roof", "label": 1}, {"captions": [" of wooden blocks with \"mara ruiz\" logo.", " a small house with trees and plants, featuring a white box with a blue lid and a green triangle, accompanied by a christmas tree."], "sample_ids": ["058c420ee2094bfb8fe19a3d3feb6b67", "4a889132cc444d10bfcbf6c760984416"], "properties": ["size, material, logo", "a, color, white"], "captions_pred_pc": ["in the title the logo for maria ruiz", "a black and white illustration of a dandelion on a white background dandelion illustration on a white background stock illustration \u00a9 iStock/Getty Images"], "captions_pred_image": ["a 3d rendering of a wooden corner post", "a 3d model of a desk and chair"], "question": "which object is white", "label": 1}, {"captions": ["a 3d silver knight's helmet.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["83c36d2cebd94939adbb0982fe547f5f", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["- material is metal- color is silver- texture is textured", "island, mountain, grass"], "captions_pred_pc": ["a black and white illustration of a heart shaped object", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d model of a medieval knight's helmet royalty free 3d model preview no 3", "a 3d image of a small island in the middle of a lake"], "question": "which entity is made of grass", "label": 1}, {"captions": [" a black and white striped box.", " a white building with a red roof."], "sample_ids": ["00fa8accaaad44c780efe0c04ed4a12b", "1f9580be397d4f948bf53fe1d5bc5756"], "properties": ["color, black, white", "color, white, roof, red"], "captions_pred_pc": ["in 15 words or less a black and white pattern on a white background", "in 15 words or fewer a black and white drawing of a camera"], "captions_pred_image": ["a 3d image of a black and white striped surface", "a 3d model of an office building"], "question": "which entity has a white roof", "label": 1}, {"captions": ["a small green and white 3d teapot in the shape of a turtle.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["8df8b28138e040a89303e91518b09d59", "b896a0898efe4059a776193c02132129"], "properties": ["shape is turtle, color is green, white", "- material is stone, metal, concrete"], "captions_pred_pc": ["above a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a white ceramic teapot on a gray background", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": [" a tree stump with roots.", "a featuring a phone booth, desk, chair, lamp, ladder, and two additional chairs."], "sample_ids": ["fa6e88ba438f4597a7230789124c8147", "7da804ad2b554c9a9915d775afb015d3"], "properties": ["root, tree, stump", "desk, chair, lamp"], "captions_pred_pc": ["above a black and white illustration of a galaxy", "a black and white illustration of a city skyline"], "captions_pred_image": ["a 3d model of a tree stump in the middle of a field", "a 3d rendering of a desk and chair in a room"], "question": "which object has more chairs", "label": 1}, {"captions": [" a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole.", " a small white house with a roof."], "sample_ids": ["b16fb21cda9a4a21a024df749c2304f4", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["roof, ceiling, hole", "roof, color, white"], "captions_pred_pc": ["a black and white image of a square with dots on it", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d model of a small house and a tree in the foreground", "a 3d model of a building with a white roof"], "question": "which house has a roof that is white?", "label": 1}, {"captions": [" a black and purple cylinder with a map, purple flower, and shattered pieces.", " of two rocks with ice elements."], "sample_ids": ["5b75108921124a39a71d9bc53d41d245", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["color, shape, material", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white abstract explosion on a white background a black and white abstract explosion on a white background royalty free illustration", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d rendering of a black cylinder with white writing on it", "a 3d image of two rocks on a gray surface"], "question": "which entity is a rock?", "label": 1}, {"captions": [" of a person in a green outfit, resembling a cartoon character.", " of two rocks with ice elements."], "sample_ids": ["839a873262a544c9906f3f5799ca4648", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["a, outfit, green", "image is a rock with ice elements"], "captions_pred_pc": ["a person with arms outstretched 3d illustration of a person with arms outstretched on a white background illustration", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a person jumping on a trampoline in slow motion", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" of a red and white pokeball with the word \"pichu\" on it, compatible with various modeling and animation software.", "a featuring a building, a coin, a small black box, and a ball."], "sample_ids": ["7d76c44cb9dd4948b8766d83994ca5f3", "949cf1a57aea45d18261e980b21b8c35"], "properties": ["- material is stl, obj, fbx- size is 240px- color is red, white", "a, building, coin, ball"], "captions_pred_pc": ["a black and white circular pattern on a white background", "a black and white illustration of a triangular shaped object"], "captions_pred_image": ["a close-up view of a black ball with a screw in the center", "a 3d model of a box with a coin next to it"], "question": "which object is not a ball?", "label": 0}, {"captions": ["a 3d-printed blue ring with holes in it.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["9adb0b6d88ec466d8df8c84ead5186a1", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["color, material, shape", "door, lock, handle"], "captions_pred_pc": ["a black and white image of a leopard print design on a white background", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a 3d rendering of a circular object with multiple holes in it", "a black and white image of a door with a crack in it"], "question": "which object is made of wood", "label": 1}, {"captions": [" of a red steel playground structure with yellow rails, featuring a bench and storage rack.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["91e069e84f754aceb99e28541cf7ae39", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["color, bench, rack", "island, terrain, water"], "captions_pred_pc": ["of a 3d illustration of a black and white square frame", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d rendering of a metal frame structure", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" a leather recliner chair and ottoman set, featuring swivel functionality and available in modern orange, tan, and brown colors.", " a leather recliner chair and ottoman set, featuring swivel functionality and available in modern orange, tan, and brown colors."], "sample_ids": ["44be138ae8e2409bbbca44a96fc67d45", "44be138ae8e2409bbbca44a96fc67d45"], "properties": ["color, tan, brown, orange", "color, tan, brown, orange"], "captions_pred_pc": ["above a black and white illustration of an office chair", "above a black and white illustration of an office chair"], "captions_pred_image": ["a grey leather lounge chair with ottoman and footstool", "a grey leather lounge chair with ottoman and footstool"], "question": "which set has a brown ottoman?", "label": 0}, {"captions": [" a small white building with a door, resembling a box-shaped house.", " a large white and metal building with a metal roof structure."], "sample_ids": ["1b5fe88d0ff149ae9d8b4eb455c5c90c", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["shape is box, color is white, door is present", "roof, metal, white"], "captions_pred_pc": ["a black and white image of a person standing in front of a white background", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a 3d model of a white, open shelving unit", "a 3d model of a large white box"], "question": "which building has a metal roof", "label": 1}, {"captions": [" of a white rectangular shelf or light fixture on a gray background.", " of a white plastic tube with a hole and a chip on it."], "sample_ids": ["a84221f27ed0416280f8e67563f95ed7", "9968e06a62e8487ea33460e640abc573"], "properties": ["background, color, white", "color is white, material is plastic, shape is tube"], "captions_pred_pc": ["a black line on a white background", "a black and white image of a broom on a stand"], "captions_pred_image": ["a long white plastic strip on a gray background", "a white object on a gray background"], "question": "which object is whiter", "label": 1}, {"captions": ["\"carl meyer logo with various text variations\"", "a featuring a fireplace with a broom, shovel, and other items, a man and a dog, a table with a bucket, and a vase with a pipe."], "sample_ids": ["eb0d2ef8bc364b1492d1e347e5f38d47", "fdcbb46224a44faca5c3fb20264eb2e7"], "properties": ["color, shape, text", "broom, shovel, fireplace"], "captions_pred_pc": ["the logo for carl meyer", "a close-up view of a white plastic bag with a small hole in it"], "captions_pred_image": ["the word 'gil heyer' is written in white letters on a gray background", "a 3d model of an outdoor fireplace"], "question": "which entity has a fireplace?", "label": 1}, {"captions": [" a large bridge over a highway, with an airport and train station nearby.", "a white of a house with a hole in the ceiling."], "sample_ids": ["b348fddc913f47df93cf35db302427d0", "2915cbd03e164ac0bb13866c2d68cc65"], "properties": ["location, highway, train station", "image, house, ceiling"], "captions_pred_pc": ["a black and white photo of a small square on a white background", "above a black and white drawing of a house"], "captions_pred_image": ["an aerial view of a city with a highway in the foreground and buildings in the background", "a 3d model of a house with a balcony"], "question": "which image shows a house with a hole in the ceiling?", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " of a person breaking through a brick wall."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "0708be2186d24d52815e8ac9d751fc37"], "properties": ["color, white, black, white", "image, brick, wall"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "for a black and white illustration of an ice cream cone"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d model of a person breaking through a brick wall"], "question": "which image shows a wall made of bricks?", "label": 1}, {"captions": [" of a wooden nightstand with a hexagonal pattern, white top, and drawer.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["e6f2dbec6d464b4da4aa47bce242f6e5", "6b745457e06840119058883b35f78f58"], "properties": ["Drawer, Pattern, White", "roof, color, blue"], "captions_pred_pc": ["a cross made of dots on a white background vector illustration of a cross made of dots on a white background royalty free illustration illustration of a cross made of dots on a white background vector illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a", "a black and white image of a building with dots"], "captions_pred_image": ["a 3d rendering of a wooden box with a handle", "a 3d model of a house with a steeple on top"], "question": "which object has a blue roof", "label": 1}, {"captions": ["a 3d purple sphere with a star pattern.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["752942069dcf4969a43af1e4fa9c4b4d", "b896a0898efe4059a776193c02132129"], "properties": ["color, shape, pattern", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white drawing of an object with dots on it", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a black sphere on a gray background", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": ["a 3d object featuring a flat roof with blue and white pattern, a purple and white bench, a blue and purple striped runner, a bed with blue and purple stripes, blue and white striped paper, a bench with a blue and purple pattern, and a ceiling light fixture with wooden slats.", " a small white house with a roof."], "sample_ids": ["28d0cf71ed684dcb87b2c9b2744c3633", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["runner, bed, bench", "roof, color, white"], "captions_pred_pc": ["a black and white drawing of a snowflake on a white background a creative black and white drawing of a snowflake on a white background royalty free illustration", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d model of a long, curved structure", "a 3d model of a building with a white roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" a pink, white, and blue stick-like object with arrow and syringe features.", "white of a rhino head with horns."], "sample_ids": ["38af8affd4f744a08e8df9770a458709", "8481aade84de47cab1a9accf8067e678"], "properties": ["color, shape, material", "image, rhino, head"], "captions_pred_pc": ["of a black and white wrist watch on a white background", "of a penguin skull in black and white"], "captions_pred_image": ["a 3d rendering of a white object on a gray background", "rhino head 3d model royalty free 3d model preview no 3"], "question": "which entity is a white image of a rhino head?", "label": 1}, {"captions": [" a flying bird, resembling a crow and a pigeon.", " of two rocks with ice elements."], "sample_ids": ["5ec78c8b6ab54f739adb0b46d216a454", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["bird, resembles, crow, pigeon", "image is a rock with ice elements"], "captions_pred_pc": ["above a black and white illustration of an airplane on a white background", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a black and white image of a bird in flight", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a house with a blue roof, chimney, and wooden-beamed ceiling.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["b380dd4800124a8d96424a504eb0ec6a", "c3a82df41875402285608ef13a55df57"], "properties": ["roof, color, blue", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["of a white lace clutch purse on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a building with many windows", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" of a white chair with arms and legs.", " of a house with a roof structure, including a greenhouse."], "sample_ids": ["61ec56afad7a45deb99ccf1ab1bd2d73", "d7483292784b4e2b81df1c50f2a8664a"], "properties": ["Arms, Legs, Color", "roof, structure, greenhouse"], "captions_pred_pc": ["of a 3d rendering of a chair in the style of the 1920s and 1930s", "a 3d illustration of a window with dots on a white background 3d illustration of a window with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white outdoor chair royalty free preview no 3", "a 3d model of a building with a roof"], "question": "which entity has a roof structure", "label": 1}, {"captions": [" a snowy small village with farm buildings and a fence.", "a white of a spaceship and building."], "sample_ids": ["6bb669534ccc434f9ab4d7b39bae3510", "bf7d4277c9184d35abdec85bd5e25956"], "properties": ["building, fence, snowy", "image, building, spaceship"], "captions_pred_pc": ["a black and white drawing of a boat on the water", "a black and white drawing of a tree"], "captions_pred_image": ["a 3d model of a small village in the snow royalty free 3d model preview no. 3", "a 3d model of a white object on a gray background"], "question": "which image shows a spaceship and building?", "label": 1}, {"captions": [" of a house with a roof and a room featuring a pink ceiling.", " a snowy mountain island."], "sample_ids": ["9c9f4e7f7c9442df99a9dc41870083c5", "fe8520ec650248fa92ba07bb95712b09"], "properties": ["roof, room, ceiling", "mountain, island, snowy"], "captions_pred_pc": ["a black and white drawing of the letter 'l' on a white background", "for a black cross on a white background"], "captions_pred_image": ["a 3d model of a building with a roof and walls", "a black and white photograph of a volcanic eruption"], "question": "which entity is a mountain?", "label": 1}, {"captions": [" a white building with a square ceiling panel and a white 3d printed plane on top.", " a wooden object, including a board, piece of wood, box, and shelf."], "sample_ids": ["eb3ea0e6963f4efda2a8cf0732befd56", "c986212445a1466ca7be7b5ac6bea729"], "properties": ["- material is 3d printed, ceiling panel is square, plane is white", "wood, board, shelf"], "captions_pred_pc": ["above a black and white drawing of a cross", "a black and white drawing of snowflakes on a white background"], "captions_pred_image": ["a 3d model of a building on a white surface", "a 3d rendering of a piece of marble"], "question": "which object is made of wood", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", " a small, snow-covered house."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "0d00d10b90134dbe9ce7b2b3d6669237"], "properties": ["yellow, table, roof", "house, snow, cover"], "captions_pred_pc": ["a black and white drawing of a floor plan", "in 15 words or less a black and white image of a piece of paper on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a piece of broken glass on a white background"], "question": "which house is covered in snow", "label": 1}, {"captions": [" a small island with trees, water, and a river.", "a featuring a chair, a building, a glass tower, and a throne made of money, with various unique elements such as a green roof and a computer chip."], "sample_ids": ["5a9c593092c04deaa0f17a1c28a79476", "18d2e75f23474d7489a6d7d605dfc76d"], "properties": ["water, river, island", "throne, chair, building"], "captions_pred_pc": ["in 15 words or less a black and white polka dot pattern on a white background", "a black and white illustration of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a snowy landscape with trees in the foreground royalty free 3d model preview no.2", "a 3d model of a building on top of a table"], "question": "which entity has a throne made of money", "label": 1}, {"captions": ["a 3d green toy dinosaur with a purple hat, horn, and accents, featuring a purple flower.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["f8cfe5430a8a4431aacf32f49b20220d", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["color, hat, flower", "camera, speaker, ceiling fan"], "captions_pred_pc": ["for a 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a purse on a white background royalty-free 3d model preview no. 3", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": ["a collection featuring a broken egg in a bowl, food wrapped in paper, torn and whole paper pieces, a white paper hat, a skull with a hat, a snowy iceberg, and a piece of cake.", " of a small island featuring a white lighthouse, a fountain, and a grassy crater."], "sample_ids": ["3d7bd392c9a14f4ab7c0aae2cf75b487", "2a30e69498ff4fd1a33c1fb72286f553"], "properties": ["hat, food, bowl", "lighthouse, fountain, crater"], "captions_pred_pc": ["in 15 words or less a black and white image of dots on a white background", "a black beanie with sparkles on a white background"], "captions_pred_image": ["a 3d model of the earth with a hole cut out of it", "a black and white image of an object on top of a pedestal"], "question": "which entity has a lighthouse?", "label": 1}, {"captions": [" a small village featuring houses, trees, and a winding road.", " a small table with a staircase and a square ceiling light."], "sample_ids": ["7acf46c0265d4e39b97ac084852abde8", "ef0cbf4628b74253b885480deefe6fbd"], "properties": ["houses, trees, road", "table, staircase, light"], "captions_pred_pc": ["in 15 words or less a black and white photo of a mountain landscape", "for a black and white image of a small square on a white background"], "captions_pred_image": ["a black and white photograph of a small town", "a 3d model of a table with a staircase"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" a house with a roof and white brick wall.", " of a stone wall with a window and multiple stone arches."], "sample_ids": ["00915b83a52b45d498962d0cd42af491", "db74ee1621464be1b164be26a1af050e"], "properties": ["roof, wall, color", "window, arches, wall"], "captions_pred_pc": ["a black and white image of a rectangle with dots all over it", "a black and white illustration of a bolt and nut on a white background a black and white illustration of a bolt and nut on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small white house with a roof", "a 3d model of an old brick wall"], "question": "which wall is made of stone", "label": 1}, {"captions": [" a small house with a blue roof.", "a featuring a tree stump, mossy wood, leaves, and a rock with grass."], "sample_ids": ["fa21afd3a99d448cb23fa527a784769c", "2527cd763a1a43f9870eb65e44e79f7d"], "properties": ["roof, color, blue", "mossy, rock, grass"], "captions_pred_pc": ["a house made of dots on a white background a house made of dots on a white background royalty free illustration", "a black and white image of a person on a skateboard"], "captions_pred_image": ["a 3d model of a house with a porch and balcony royalty-free 3d model preview no.2", "a 3d model of a piece of wood on a white background"], "question": "which entity has a mossy rock?", "label": 1}, {"captions": ["a blue and white radio in the shape of a small suitcase.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["314cb57bed324d268c1205f5c7bf80ab", "6b745457e06840119058883b35f78f58"], "properties": ["color, shape, material", "roof, color, blue"], "captions_pred_pc": ["of a black and white drawing of a purse", "a black and white image of a building with dots"], "captions_pred_image": ["a 3d model of an old-fashioned radio on a white background royalty-free 3d model preview no.2", "a 3d model of a house with a steeple on top"], "question": "which entity is made of wood", "label": 1}, {"captions": ["modern tan leather lounge chair.", " a destroyed car with rusted, broken metal and torn paper."], "sample_ids": ["409916a53a0d434599e2a3f52bfe9396", "3fe31c3bf5cd4574a8ca02222411a988"], "properties": ["color, tan, leather", "metal, rusted, paper"], "captions_pred_pc": ["a black and white illustration of an object in the shape of a butterfly on a white background 3d illustration of a black and white illustration of an object in the shape of a butterfly on a white background vector illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustr", "a black and white drawing of a person sitting in a chair"], "captions_pred_image": ["a gray leather lounge chair with a metal base", "a black and white image of a piece of debris on the ground"], "question": "which object is made of metal", "label": 1}, {"captions": [" a modern office building with a green door, green roof, windows, and blue lights.", " a red, blue, and green striped tower building."], "sample_ids": ["d6087023095446fbadef1721478373b2", "8668f9e9d1a64b86b31f260b8056cd19"], "properties": ["door, roof, window", "color, red, blue, green"], "captions_pred_pc": ["a black and white drawing of a toilet brush", "a black and white drawing of a butterfly on a white background a black and white drawing of a butterfly on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of an apartment building", "a 3d model of a pair of cylindrical towers with a staircase leading up to the top of one of the towers"], "question": "which building has a green roof", "label": 1}, {"captions": [" a house featuring a roof, floor plan, small bathroom, pool, and ceiling light fixtures.", " a house with a yard, trees, bushes, and surrounding buildings."], "sample_ids": ["6c6549e2975a48a1b59ebe2a6562900e", "7f8942ef51dd4246993a587a12df168c"], "properties": ["floor plan, bathroom, pool", "house, yard, surrounding buildings"], "captions_pred_pc": ["the floor plan of the house the floor plan of the house on a white background royalty free illustration", "a black and white image of a truck on a white background"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a house in the middle of a field"], "question": "which house has a yard", "label": 1}, {"captions": [" a small white and brown house with windows and a door.", " a small, modern house with a green roof, located on a hill, surrounded by trees, grass, and a pond."], "sample_ids": ["7de99253a4bc4d98bd941e40bbad8c7a", "a452d5381dad4dc09f5ebe10635ae5fe"], "properties": ["color, door, window", "house, roof, green"], "captions_pred_pc": ["of a person wearing a black and white striped shirt", "above an illustration of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white"], "captions_pred_image": ["a 3d model of a police box royalty free 3d model preview no.2", "a 3d model of a building with a black roof"], "question": "which house has a green roof", "label": 1}, {"captions": [" of a blue pickup truck", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["b27e6039984b46deb9f907ddf2515a45", "c3a82df41875402285608ef13a55df57"], "properties": ["color is blue, make is ford, model is f150", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white image of a square with dots on it", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a pickup truck royalty free 3d model no 3", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" of a multi-colored spooling machine with wires and circuit board, featuring a lamp and shelf with toys.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["0e3f5cc16806492b948d41a748819ce3", "a17477b445b3443189dad22f768b888b"], "properties": ["color, shape, material", "roof, pillar, stairs"], "captions_pred_pc": ["a black and white image of a decorative tile", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d rendering of an electronic device on a white surface", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": ["a small clay jug with a face, handle, and spout, depicted as a .", " a small house with a blue roof, a door, and a pool."], "sample_ids": ["ceee98c20f23424195da092156905ec4", "40c52c2d278345c5b4e8d00a991271dc"], "properties": ["face, handle, spout", "door, roof, pool"], "captions_pred_pc": ["a black and white image of a small, circular object", "of a black and white photo of a window with fringes"], "captions_pred_image": ["a white ceramic vase with a face on it", "a 3d model of a small house"], "question": "which entity has a door", "label": 1}, {"captions": [" a building featuring graffiti, chinese writing, a door, a broken window, and an interior bathroom with a sink.", " a stone, wood, rock, sliced bread, and a skull with a blue hat."], "sample_ids": ["1ee3df6f94ea4c329a9c5245634e34d5", "0169af65ffc64bbf8e2fe6c6de08d485"], "properties": ["graffiti, chinese writing, door", "hat, skull, bread"], "captions_pred_pc": ["a black and white illustration of a bridge with dots", "a black and white illustration of a skull in the shape of dots"], "captions_pred_image": ["a black and white image of a bathroom with a sink and a toilet", "a black and white image of a stone sculpture"], "question": "which entity has a skull?", "label": 1}, {"captions": [" a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog.", "a featuring a red hat, floating cup, bowl filled with candy, and a strawberry."], "sample_ids": ["f6c6e7a65a3e42dfa431b1d984c72f28", "e27a9fd533dc41da9cf2eeb8fee2a5af"], "properties": ["house, fence, dog", "hat, candy, strawberry"], "captions_pred_pc": ["above a black and white drawing of a bathroom", "a black and white illustration of two spheres"], "captions_pred_image": ["a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a", "a black and white image of a person wearing a hat"], "question": "which entity has a floating cup?", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", " a small white house with stairs and a wall-mounted shelf."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "10c4ba5b0db4490db9c00c21c94cb41f"], "properties": ["yellow, table, roof", "house, color, white"], "captions_pred_pc": ["a black and white drawing of a floor plan", "above a black and white drawing of a bench"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of a small white building"], "question": "which house is white", "label": 1}, {"captions": [" of a white chair with arms and legs.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["61ec56afad7a45deb99ccf1ab1bd2d73", "b896a0898efe4059a776193c02132129"], "properties": ["Arms, Legs, Color", "- material is stone, metal, concrete"], "captions_pred_pc": ["of a 3d rendering of a chair in the style of the 1920s and 1930s", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a white outdoor chair royalty free preview no 3", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": [" of a black flat screen lcd monitor on a stand.", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["0cc63371c12344e892d1c7be5a9eb782", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["screen, monitor, stand", "lizard, rock, stuffed animal"], "captions_pred_pc": ["a black and white close-up of a television on a stand", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a computer monitor royalty free 3d model preview no. 2", "a 3d model of a vehicle with wheels and tires"], "question": "which entity has a stuffed animal?", "label": 1}, {"captions": ["a pair of yellow pliers.", " of a plague mask with a rusty, horned, wooden helmet and a crow's head design."], "sample_ids": ["570d29f10e5b428b91da27cff52bac56", "2b0896f810074399a5ae7d6dbab8c330"], "properties": ["color, yellow, pliers", "- material is wood, rusty, horned"], "captions_pred_pc": ["of a pair of pliers on a white background vector illustration of a pair of pliers on a white background illustration", "in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration"], "captions_pred_image": ["a pair of scissors on a white background", "3d model of a plague doctor's mask"], "question": "which object is made of wood", "label": 1}, {"captions": [" a small gnome chandelier with three candles, featuring red and blue lights and adorned with pink and blue ribbons.", " of a plague mask with a rusty, horned, wooden helmet and a crow's head design."], "sample_ids": ["6398f4e75a2d480da58396827ac64249", "2b0896f810074399a5ae7d6dbab8c330"], "properties": ["- color is red, blue, pink", "- material is wood, rusty, horned"], "captions_pred_pc": ["for a black and white image of an object on a white background", "in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration"], "captions_pred_image": ["a white chandelier with three light bulbs hanging from the ceiling", "3d model of a plague doctor's mask"], "question": "which object is made of wood", "label": 1}, {"captions": [" of a house with a pink roof.", " a two-story small apartment building with a roof."], "sample_ids": ["6162909df6294848a8eea83c3aa9585b", "8d1102e923954604ae7045a7ca14c1f6"], "properties": ["color, roof, pink", "two-story, roof, building"], "captions_pred_pc": ["a black and white drawing of the letter 'p' on a white background illustration", "a black and white pattern of dots in the shape of the letter c on a white background vector illustration of a black and white pattern of dots in the shape of the letter c on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a house in the style of the 1920s and 1930s", "a 3d model of an apartment building royalty free 3d model preview no 2"], "question": "which building has a roof?", "label": 1}, {"captions": [" a building featuring a radio tower with a red antenna.", " a small table with a staircase and a square ceiling light."], "sample_ids": ["15d6b41485754984b57ea7990faa53cd", "ef0cbf4628b74253b885480deefe6fbd"], "properties": ["building, tower, color", "table, staircase, light"], "captions_pred_pc": ["for a black square on a white background", "for a black and white image of a small square on a white background"], "captions_pred_image": ["a 3d model of a radio tower royalty free 3d model preview no 2", "a 3d model of a table with a staircase"], "question": "which object has a light?", "label": 1}, {"captions": [" a red, blue, and green striped tower building.", " a large metal building with a roof and truss structure."], "sample_ids": ["8668f9e9d1a64b86b31f260b8056cd19", "b85a99699ccd4bcba213322113bb253d"], "properties": ["color, red, blue, green", "roof, truss, structure"], "captions_pred_pc": ["a black and white drawing of a butterfly on a white background a black and white drawing of a butterfly on a white background royalty free illustration", "of a metal grate on a white background"], "captions_pred_image": ["a 3d model of a pair of cylindrical towers with a staircase leading up to the top of one of the towers", "a 3d model of a long metal fence"], "question": "which building has a roof and truss structure", "label": 1}, {"captions": [" a small house with a blue roof, a door, and a pool.", " a small building with a staircase in a room."], "sample_ids": ["40c52c2d278345c5b4e8d00a991271dc", "5a5b80af609a42acaebfdd086ae54336"], "properties": ["door, roof, pool", "room, staircase, building"], "captions_pred_pc": ["of a black and white photo of a window with fringes", "a black and white image of the letter l"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a staircase on a white surface"], "question": "which building has a staircase?", "label": 1}, {"captions": [" of a computer with a green screen, keyboard, and white box.", " a small white building featuring a room with a door and a white shelf."], "sample_ids": ["fb1e5a04ef4644f98219e1d5d52ab073", "d7b78fa9a6b64f6095b881bc619b04fe"], "properties": ["screen, keyboard, box", "room, door, shelf"], "captions_pred_pc": ["a box made up of many small dots on a white background a box made up of many small dots on a white background royalty free illustration", "above a black and white illustration of a person standing in front of a door"], "captions_pred_image": ["a 3d model of a vintage computer royalty-free 3d model preview", "a 3d model of an empty room"], "question": "which entity has a door?", "label": 1}, {"captions": ["a yellow gold ring with an engraved quote, \"strength and love.\"", "3d object featuring a blue and white tile, mattress, book with a sign, and a hanging notice sign, all representing god's family faith in action."], "sample_ids": ["c155767db07340f2813c1b3dfa8d63b9", "4b1d361630b949889282755fb6fb46bc"], "properties": ["ring, material, gold", "Object, color, white"], "captions_pred_pc": ["of a black bangle bracelet on a white background", "a black and white image of a line of dots on a white background"], "captions_pred_image": ["a white wedding band with the words strength in weakness engraved on it", "god's family faith in action"], "question": "which object is white", "label": 1}, {"captions": [" of a wooden cabinet with drawers and filing features.", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["abbc90bbd5474f73b16482ccd10e07ec", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["Cabinet, Drawers, Filing", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a 3d rendering of a white wooden chest of drawers", "a 3d rendering of a plastic box with several compartments"], "question": "which entity is made of plastic", "label": 1}, {"captions": [" a medieval stone castle with walls and stairs.", " a house with wooden framing and trusses."], "sample_ids": ["10bc1bbec4c045f9b15fc2156b5e32ee", "4501794e257c4a8ba60a94757d8e93a9"], "properties": ["wall, stairs, castle", "frame, trusses, wood"], "captions_pred_pc": ["a castle in the snow a castle in the snow illustration on a white background royalty free illustration", "a black and white drawing of a window"], "captions_pred_image": ["a 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the", "a 3d model of a house under construction"], "question": "which building is made of wood", "label": 1}, {"captions": ["s of a rock, boat, plane, and leaf on a stick.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["be0884a7ced34b3d92687b6087798a1e", "a17477b445b3443189dad22f768b888b"], "properties": ["s, stick, leaf", "roof, pillar, stairs"], "captions_pred_pc": ["above a black and white drawing of an object floating in the sky", "a black and white image of a square with dots"], "captions_pred_image": ["a black and white photograph of a rock on a sandy surface", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": [" a mouse wearing a top hat and a teddy bear holding a spoon.", " a tan, cylindrical hat with a hole in it."], "sample_ids": ["887e410d07854396b563325ae1929583", "00c350e9f5f24fcd8bcc378da2963d4c"], "properties": ["hat, mouse, bear", "hat, color, tan"], "captions_pred_pc": ["a black and white illustration of a snowflake on a white background", "a black and white drawing of a mushroom on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a mouse wearing a top hat and bow tie", "a 3d model of an object with a hole in it"], "question": "which hat is tan", "label": 1}, {"captions": [" of a white plastic bar, resembling a shelf or towel rod, with a metal light fixture.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["f84dec547a3d4710b48db11fc9fa489a", "bf18bfd89efd43389781050230467d58"], "properties": ["light source, fixture, color", "Lights, number, five"], "captions_pred_pc": ["a black and white photo of a small square on a white background", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a white object on a gray background", "a white chandelier with five white shades"], "question": "which light source is more powerful", "label": 1}, {"captions": [" a small white house with a staircase and a window.", " a large house with a roof on a platform."], "sample_ids": ["9eb88d17310d42dda9e17883e9922525", "cb3e09a301b746918a682a595037c7f7"], "properties": ["house, staircase, window", "roof, platform, house"], "captions_pred_pc": ["a black and white illustration of a person standing in the middle of a crowd of tiny black dots on a white background royalty free illustration", "a black and white image of a piece of paper"], "captions_pred_image": ["a 3d rendering of a small room with a staircase", "a 3d model of a small house"], "question": "which house has a roof on a platform", "label": 1}, {"captions": ["a pink pixelated 3d pig model with black eyes and a handle.", " a gun with a blue handle."], "sample_ids": ["d9006ea4af304f3c9398339f9fc99fc3", "ad65fd36ff284655ab9331e2e8a5a8a5"], "properties": ["color, eye, handle", "color, handle, blue"], "captions_pred_pc": ["above a black and white drawing of a room", "a black and white image of a gun and a toothbrush"], "captions_pred_image": ["a 3d model of a small white box", "a 3d model of a gun on a gray background"], "question": "which object has a blue handle", "label": 1}, {"captions": [" of a house with multiple roofs and a suspended ceiling, featuring a white and purple tray.", " of an egyptian sarcophagus."], "sample_ids": ["7907916e8f524df5b16eb566497db83e", "70aa484af2ab44149a608dd81a6ff459"], "properties": ["color, roof, tray", "sarcophagus, material, wood"], "captions_pred_pc": ["a black and white image of a metal object", "a black and white circular pattern on a white background"], "captions_pred_image": ["a 3d model of the roof of a building", "a black and white photograph of a sphere with egyptian hieroglyphics on it"], "question": "which object is made of wood", "label": 1}, {"captions": [" a building featuring yellow columns, a yellow roof, and a wooden structure.", " a building featuring yellow columns, a yellow roof, and a wooden structure."], "sample_ids": ["0ce6a4102f4f40e2a0084938b0a93941", "0ce6a4102f4f40e2a0084938b0a93941"], "properties": ["structure, columns, roof", "structure, columns, roof"], "captions_pred_pc": ["a black and white drawing of a window", "a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a building with multiple levels", "a 3d model of a building with multiple levels"], "question": "which building has a wooden structure?", "label": 0}, {"captions": ["a featuring a tree stump, mossy wood, leaves, and a rock with grass.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["2527cd763a1a43f9870eb65e44e79f7d", "6b745457e06840119058883b35f78f58"], "properties": ["mossy, rock, grass", "roof, color, blue"], "captions_pred_pc": ["a black and white image of a person on a skateboard", "a black and white image of a building with dots"], "captions_pred_image": ["a 3d model of a piece of wood on a white background", "a 3d model of a house with a steeple on top"], "question": "which entity has a blue roof", "label": 1}, {"captions": [" a small house with stairs and a roof.", " a house with a wooden-framed roof structure."], "sample_ids": ["e9305c80010f4e3b9de9789f01a9bee5", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["roof, stairs, house", "roof, material, wood"], "captions_pred_pc": ["above a black and white image of a square with dots all over it", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d rendering of a podium on a wooden floor", "a 3d model of a building with a roof"], "question": "which roof is made of wood", "label": 1}, {"captions": [" a white and yellow metal shelving unit with a steel structure, yellow legs, and suspended ceiling system.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["c1a7d264b34841409009b3d5d39d5b99", "c3a82df41875402285608ef13a55df57"], "properties": ["Steel, Color, Yellow", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white illustration of a building", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a table with multiple tables and chairs", "a white plastic object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a wooden cube.", " of a yellow metal locker with legs, wheels, and metal brackets."], "sample_ids": ["cf3ae5b8dad64e338c835d22947ce796", "e3fde8fe782c41f0b141c9f1b8e13aa5"], "properties": ["shape is cube, material is wood, color is brown", "metal, legs, wheels"], "captions_pred_pc": ["in 15 words or less a black and white pattern of dots on a white background a black and white pattern of dots on a white background royalty free illustration", "a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white"], "captions_pred_image": ["a 3d model of a marble cube on a white background royalty free 3d model preview no.3", "a 3d model of an old metal locker"], "question": "which object is made of metal", "label": 1}, {"captions": [" a green electrical utility box with a warning sign and a small blue box on a concrete base.", " a large metal building with a roof and truss structure."], "sample_ids": ["8e39d4766ed4444ea527d6c5ea33a5ef", "b85a99699ccd4bcba213322113bb253d"], "properties": ["color, base, warning", "roof, truss, structure"], "captions_pred_pc": ["a black and white illustration of a box with dots on it", "of a metal grate on a white background"], "captions_pred_image": ["a 3d model of an electrical box royalty-free 3d model preview no.2", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": [" a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog.", " a cartoon panda bear wearing a diaper."], "sample_ids": ["f6c6e7a65a3e42dfa431b1d984c72f28", "e2c307d9fa2b4d40b4602537d7f71e24"], "properties": ["house, fence, dog", "cartoon, bear, diaper"], "captions_pred_pc": ["above a black and white drawing of a bathroom", "a 3d model of a teddy bear on a white background 3d model of a teddy bear on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a", "a 3d model of a cute panda bear"], "question": "which entity is a cartoon?", "label": 1}, {"captions": [" of a plague mask with a rusty, horned, wooden helmet and a crow's head design.", " of a stone wall with a window and multiple stone arches."], "sample_ids": ["2b0896f810074399a5ae7d6dbab8c330", "db74ee1621464be1b164be26a1af050e"], "properties": ["- material is wood, rusty, horned", "window, arches, wall"], "captions_pred_pc": ["in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration", "a black and white illustration of a bolt and nut on a white background a black and white illustration of a bolt and nut on a white background royalty free illustration"], "captions_pred_image": ["3d model of a plague doctor's mask", "a 3d model of an old brick wall"], "question": "which entity is made of stone", "label": 1}, {"captions": ["a small yellow and green sphere resembling saturn with a hat.", "\"multiple white cubes arranged in a row on a gray background.\""], "sample_ids": ["6811a2f3d1154dccb37d534ae673e673", "17c8222d4ce04e518117078e7de6aaed"], "properties": ["color, shape, size", "color, background, white"], "captions_pred_pc": ["a black and white illustration of a dotted circle on a white background a black and white illustration of a dotted circle on a white background royalty free illustration", "a black and white image of a box with the words 'box 2' and 'thoughtful'"], "captions_pred_image": ["a 3d model of the planet saturn royalty free 3d model preview no.2", "an image of a white background with a few small cubes on it"], "question": "which entity is a sphere?", "label": 0}, {"captions": [" a tall glass tower featuring blue, green, and white squares.", " a metal-framed wall with red and blue bars in a steel structure."], "sample_ids": ["405d68eda26c401fbcddc7e3a457c74e", "fefc99453e2d4406a9668d5697224c0f"], "properties": ["color, shape, height", "color, red, blue, structure"], "captions_pred_pc": ["of a black and white photo of a pair of earrings", "a black and white image of a person holding a toothbrush"], "captions_pred_image": ["a black and white photograph of a metal sculpture on a pedestal", "a 3d rendering of a metal frame structure"], "question": "which structure is made of metal", "label": 1}, {"captions": [" a white rocking chair with a curved backrest.", "a 3d cartoon character of a boy wearing a cowboy hat and blue pants."], "sample_ids": ["ee0deb90abf943b6894cd5ded1331213", "e94fb39e384f47ce87cce1930851b8f8"], "properties": ["backrest, curved, yes", "hat, pants, shirt"], "captions_pred_pc": ["a black and white illustration of a spiral pattern on a white background a black and white illustration of a spiral pattern on a white background royalty free illustration", "a square made of dots on a white background a square made of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white chair royalty free 3d model no. 3", "a 3d model of a cartoon character wearing a hat"], "question": "which entity has a hat?", "label": 1}, {"captions": [" a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light.", "a featuring a chair, a building, a glass tower, and a throne made of money, with various unique elements such as a green roof and a computer chip."], "sample_ids": ["5ea0962b100b4fccb761ed84afe027b5", "18d2e75f23474d7489a6d7d605dfc76d"], "properties": ["house, table, chair", "throne, chair, building"], "captions_pred_pc": ["above a black and white photograph of an open door", "a black and white illustration of a person sitting on a bench"], "captions_pred_image": ["a 3d rendering of a small white table with a chair", "a 3d model of a building on top of a table"], "question": "which entity has a throne made of money", "label": 1}, {"captions": ["a 3d pink spiky spherical flower.", " a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles."], "sample_ids": ["039a3fc74e39450883c46acbe2f57476", "bd7aab78974643f5a0660c699daf8eb3"], "properties": ["color, shape, texture", "roof, color, yellow"], "captions_pred_pc": ["a black and white drawing of a snowflake on a white background", "a black and white drawing of a room"], "captions_pred_image": ["a 3d model of a snowflake on a white background", "a 3d model of a table and chairs on a white background"], "question": "which entity has a roof that is yellow", "label": 1}, {"captions": [" a large, black and white circular building, resembling a stadium or ring structure.", " a large white and metal building with a metal roof structure."], "sample_ids": ["67f46bb0048244c687a58d1017a08f6b", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["building, color, black and white", "roof, metal, white"], "captions_pred_pc": ["the letter c in black and white illustration of the letter c in black and white illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustr", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a 3d model of a circular fence with black and white stripes", "a 3d model of a large white box"], "question": "which building is white", "label": 1}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", " a small wooden house with a green roof."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["color, material, structure", "roof, color, green"], "captions_pred_pc": ["a black and white drawing of a room with dots", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a 3d model of a house with a ladder"], "question": "which structure is made of wood", "label": 1}, {"captions": [" a futuristic white sports racing car.", " of a yellow metal locker with legs, wheels, and metal brackets."], "sample_ids": ["95f5d1518f1b4a638f4bc5e444a7e1a1", "e3fde8fe782c41f0b141c9f1b8e13aa5"], "properties": ["color, white, futuristic", "metal, legs, wheels"], "captions_pred_pc": ["| all images person 2019 3d model by person | person 2019 3d model by person", "a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white"], "captions_pred_image": ["a 3d model of a futuristic sports car", "a 3d model of an old metal locker"], "question": "which object has wheels", "label": 1}, {"captions": ["an orange and white striped rocket model.", " a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole."], "sample_ids": ["9f19d5d47d174d3382c7dc31aaf22f0b", "b16fb21cda9a4a21a024df749c2304f4"], "properties": ["color, orange, white", "roof, ceiling, hole"], "captions_pred_pc": ["a black and white drawing of a tree on a white background vector illustration of a black and white drawing of a tree on a white background, isolated on a white background illustration of a black and white drawing of a tree on a white background, isolated on a white background illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a", "a black and white image of a square with dots on it"], "captions_pred_image": ["a 3d model of a rocket on a gray background", "a 3d model of a small house and a tree in the foreground"], "question": "which entity has a roof", "label": 1}, {"captions": [" a staircase with a glass railing and a small white table, featuring a ceiling light.", " a house with a yard, trees, bushes, and surrounding buildings."], "sample_ids": ["10b899daca25493cba6bfffbbe7990fe", "7f8942ef51dd4246993a587a12df168c"], "properties": ["railing, glass, table", "house, yard, surrounding buildings"], "captions_pred_pc": ["above a black and white photograph of a cell phone", "a black and white image of a truck on a white background"], "captions_pred_image": ["a 3d rendering of a staircase with a glass railing", "a 3d model of a house in the middle of a field"], "question": "which entity has a yard", "label": 1}, {"captions": [" a destroyed house and plane amidst a town with buildings.", " a staircase with a railing, table, and chair, featuring a square ceiling light."], "sample_ids": ["0fd3ddca09194b8f94ef731af3b64a08", "51a0fba79bce472a8b827b78a110b77f"], "properties": ["house, plane, town", "stair, table, chair"], "captions_pred_pc": ["above a black and white drawing of a piece of paper", "for a black and white image of a toilet paper holder"], "captions_pred_image": ["a 3d model of a damaged building", "a 3d model of a staircase in a room"], "question": "which entity has a table and chair?", "label": 1}, {"captions": ["a featuring a small island with a mountain, a lake in the middle, and a bird flying above.", " a tree with a blue ball and a mushroom."], "sample_ids": ["e51fead89ae64968b2ca7f4ccb6d3b97", "4bb8528bd042471f8865cce122a03924"], "properties": ["a, bird, lake", "a, ball, mushroom"], "captions_pred_pc": ["a black and white photograph of a cloudy sky", "a black and white illustration of an airplane flying over a tree"], "captions_pred_image": ["a 3d model of a mountain range in black and white", "a 3d model of a tree with a ball on top of it royalty free 3d model preview no.2"], "question": "which entity has a ball and a mushroom?", "label": 1}, {"captions": [" a small house with a pond and situated on a rock.", " a house with a green, wooden-structured roof."], "sample_ids": ["92859eb82a344134806b37cc209927c6", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["house, rock, pond", "roof, color, green"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a toaster", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a 3d model of a house in the middle of a field", "a 3d model of a house with a triangular roof"], "question": "which house has a green roof", "label": 1}, {"captions": [" a white and black horned demon with bunny features.", "a silver ring with a swirly design and a white 3d printed sphere."], "sample_ids": ["1d3537d1341a423f89990e9b06924904", "8d81b384b5cc4f46a1779d0a2f5f7e27"], "properties": ["color, horns, features", "color, silver, white"], "captions_pred_pc": ["above a black and white image of a pair of headphones", "a black and white illustration of a circle with dots"], "captions_pred_image": ["a 3d model of a skull with horns on its head", "a 3d model of a silver ball on a gray background"], "question": "which entity is not a ring?", "label": 0}, {"captions": [" a yellow and white pickup truck with yellow wheels.", " a destroyed car with rusted, broken metal and torn paper."], "sample_ids": ["6f6861c416be4a59ac201f8cb019c349", "3fe31c3bf5cd4574a8ca02222411a988"], "properties": ["color, yellow, white, wheels", "metal, rusted, paper"], "captions_pred_pc": ["a car made of dots on a white background a car made of dots on a white background stock illustration \u00a9 iStock/Getty Images", "a black and white drawing of a person sitting in a chair"], "captions_pred_image": ["a 3d model of a pickup truck royalty-free 3d model preview no.2", "a black and white image of a piece of debris on the ground"], "question": "which entity is more likely to have rusted metal", "label": 1}, {"captions": [" a small white building featuring a room with a door and a white shelf.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["d7b78fa9a6b64f6095b881bc619b04fe", "c3a82df41875402285608ef13a55df57"], "properties": ["room, door, shelf", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["above a black and white illustration of a person standing in front of a door", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of an empty room", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a house featuring furniture, people, a staircase, and torn-apart sections.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["85335d3349894b5884dbf6f3e7d68fcc", "a17477b445b3443189dad22f768b888b"], "properties": ["furniture, staircase, torn-apart", "roof, pillar, stairs"], "captions_pred_pc": ["a black and white image of a pixelated cityscape on a white background royalty free illustration", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d model of a damaged room with broken furniture and debris on the floor", "a 3d model of a small building with a balcony"], "question": "which house has a roof", "label": 1}, {"captions": [" an ice cream machine, popsicle with two sticks, and a shelf with a computer monitor.", " a house with a flat roof structure."], "sample_ids": ["cb840159fea7436d81eb33bdccad3596", "abc52d210d71415296730bb00352ce6f"], "properties": ["A, a, a", "roof, flat, structure"], "captions_pred_pc": ["a black and white illustration of a bench", "a black and white drawing of a window with dots around it"], "captions_pred_image": ["a 3d rendering of a white and gray wall mounted shelf", "a 3d model of a house with a roof"], "question": "which structure has a flat roof", "label": 1}, {"captions": ["a white 3d object featuring black and white patterns, resembling a combination of a dish, smoke detector, cake, bowl, alarm clock, and ceiling light.", "a white 3d printed mickey mouse dice with various numbers and symbols on it."], "sample_ids": ["6a8cc820f00a4cfc954d56e2b1f6206a", "e2645ac544844f3c981203134a99c30c"], "properties": ["- material is plastic- color is white- shape is cylinder", "- material is plastic- shape is dice- color is white"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a plate", "a circle of dots with the number 2 in the center"], "captions_pred_image": ["a cake with a black and white design on the top of the cake", "a 3d printed white dice with a mickey mouse face"], "question": "which object is whiter", "label": 1}, {"captions": [" a large white and metal building with a metal roof structure.", "star wars stormtrooper "], "sample_ids": ["0ee2683270b1486991f9b9ef12990a78", "05678f11f4fe47178c9b4941ee334e16"], "properties": ["roof, metal, white", "a, color, white"], "captions_pred_pc": ["of a lamp with a black shade on a white background", "a black and white illustration of a stormtrooper"], "captions_pred_image": ["a 3d model of a large white box", "a 3d model of a star wars stormtrooper"], "question": "which is not a white color", "label": 0}, {"captions": [" a long row of steel shelves in a warehouse, featuring a suspended scaffolding system.", "a green and red toy gun with a scope and yellow accents."], "sample_ids": ["578fe7a7bd754b889be33aea99cf5050", "4b7263b58a6647c3a03226b39c42108a"], "properties": ["a, material, steel", "color, red, green, yellow"], "captions_pred_pc": ["above a black and white image of a rack with multiple shelves", "a black and white image of a toothbrush"], "captions_pred_image": ["a 3d model of a large metal structure", "a 3d model of an assault rifle on a gray background"], "question": "which object is made of red", "label": 1}, {"captions": [" a table with two bowls and a cup on it.", " featuring a moss-covered rock, mushroom in grass, and green leaves with a brown spot."], "sample_ids": ["41842c8d2ebd402da04def3c53c41633", "34ebe81ae93841ca829efd15aee4d8c1"], "properties": ["a, bowl, cup", "moss, mushroom, grass"], "captions_pred_pc": ["a black and white image of two eyes in the sky", "for a black and white illustration of a cloud on a white background"], "captions_pred_image": ["a 3d model of a table on a marble floor royalty free 3d model preview no. 3", "a 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor"], "question": "which entity has a brown spot?", "label": 1}, {"captions": [" a building or house with a roof and floor plan, resembling a pyramid with a flat roof.", " a house with a roof and beams."], "sample_ids": ["7a91292e1ed64e60a1bbbb499209a0df", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["apse, roof, floor plan", "roof, beams, house"], "captions_pred_pc": ["a black and white drawing of a room", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a building in the shape of a pyramid", "a 3d model of a building with a roof"], "question": "which entity has a roof with beams", "label": 1}, {"captions": [" a house with a green roof and lawn.", " a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "bd7aab78974643f5a0660c699daf8eb3"], "properties": ["roof, green, lawn", "roof, color, yellow"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "a black and white drawing of a room"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a 3d model of a table and chairs on a white background"], "question": "which roof is yellow", "label": 1}, {"captions": ["smiley-faced banana .", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["cc0099a687194a31a052ac761f5fdfea", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["face is smiling, banana is yellow, smiley face is a sticker", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["above a black and white image of a surfboard on a white background", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a 3d model of a banana with a smiling face", "a 3d rendering of a plastic box with several compartments"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a featuring a train, large ship, long metal pipe, boat, black map of kenya, and a black piece of plastic.", " a wooden staircase with a railing and table."], "sample_ids": ["49c5a9d42ba64fb2b681ef583d700b98", "956247bea850458199c651037d4b1d7f"], "properties": ["a train, a ship, a boat", "railing, table, staircase"], "captions_pred_pc": ["above a black and white image of a long, curved line on a white background", "above a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a submarine", "a 3d model of a table with a staircase"], "question": "which entity has a table?", "label": 1}, {"captions": [" a colorful painting featuring a sad buddha and an angel.", "a white of a man with arms outstretched."], "sample_ids": ["c8d5067bcfdb41a99b103cc0addbb0cb", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["color, image, subject", "image, color, white"], "captions_pred_pc": ["above a black and white drawing of a square with small black dots", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a black and white drawing of an angel holding a baby", "a 3d model of a man with his arms outstretched"], "question": "which image is black and white?", "label": 1}, {"captions": [" a silver spaceship-skull hybrid with blue lights, spheres, and jewels, featuring a robot with a head and blue eyes.", " a small, rocky island with diverse terrain and scattered rocks."], "sample_ids": ["ae7c805076b14abe83578069c7bf1c03", "09f2cf267e954c958828325067bcc36a"], "properties": ["color, light, jewels", "island, terrain, rocks"], "captions_pred_pc": ["in 15 words or less a black and white image of a snowflake on a white background royalty free illustration", "above a black and white photo of a small island in the middle of a body of water"], "captions_pred_image": ["a 3d sculpture of a heart with various objects on top of it", "a black and white image of a piece of dirt on the ground"], "question": "which entity has more rocks", "label": 1}, {"captions": [" of a small island featuring a white lighthouse, a fountain, and a grassy crater.", " of a torn piece of paper, a hole in a wall, and a guitar with a sign on it, accompanied by a piece of metal, wood, and a large rock."], "sample_ids": ["2a30e69498ff4fd1a33c1fb72286f553", "cc4ccf85d4c1425cb5975b8b5664d38a"], "properties": ["lighthouse, fountain, crater", "paper, hole, sign"], "captions_pred_pc": ["a black beanie with sparkles on a white background", "a silhouette of a map of the state of karnataka, india on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of an object on top of a pedestal", "an image of a torn piece of paper in the shape of a map"], "question": "which entity has a sign on it", "label": 1}, {"captions": [" a futuristic space station featuring a bench, computer desk with a laptop, small coffee machine, printer, and computer monitor.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["9b8e2f9070b24956a343a01a5fabdf03", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["computer desk, laptop, monitor", "house, pool, balcony"], "captions_pred_pc": ["a black and white silhouette of a traditional japanese gate", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a futuristic room with a bench", "a 3d model of a modern house"], "question": "which entity has a balcony?", "label": 1}, {"captions": ["a pink pixelated 3d pig model with black eyes and a handle.", "a black and gold cylindrical object with a gold handle, resembling a cigarette lighter, lamp, and pen."], "sample_ids": ["d9006ea4af304f3c9398339f9fc99fc3", "c9b1c89380e947f58aa06eb56c93c6d8"], "properties": ["color, eye, handle", "- color is black and gold- shape is cylindrical- material is metal"], "captions_pred_pc": ["above a black and white drawing of a room", "a black and white image of a circular object on a white background"], "captions_pred_image": ["a 3d model of a small white box", "a 3d model of a black and white object on a gray background"], "question": "which object has a handle", "label": 1}, {"captions": ["a small clay jug with a face, handle, and spout, depicted as a .", " a damaged room with destroyed furniture, featuring a kitchen, bathroom, and broken window."], "sample_ids": ["ceee98c20f23424195da092156905ec4", "06dd6456dc244a51b6b6e1c8524820de"], "properties": ["face, handle, spout", "room, furniture, window"], "captions_pred_pc": ["a black and white image of a small, circular object", "above a black and white drawing of a person sitting on a bench"], "captions_pred_image": ["a white ceramic vase with a face on it", "a 3d image of a room with a person in it"], "question": "which entity has a kitchen", "label": 1}, {"captions": [" a wooden door with a lock, handle, and a piece of paper on it.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["78762b19b7dc4823a0033ec63f092ca5", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["door, lock, handle", "a, material, clay"], "captions_pred_pc": ["in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a black and white image of a door with a crack in it", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": [" the earth featuring various elements such as temperature chart, blue and green stripes, blue arrow, exosphere label, england label, blue and purple stripes, and a blue flag.", " a potted christmas pine tree."], "sample_ids": ["4945571db2d8467cb2aed8dd0d891c2e", "460c8f3034a844159826fac3b8aa35a5"], "properties": ["color, temperature, england", "a, color, green"], "captions_pred_pc": ["of a black and white photo of an airplane on a white background", "a black and white illustration of a snowflake on a white background"], "captions_pred_image": ["a 3d model of the earth with a rainbow in the sky", "a 3d model of a christmas tree in a vase"], "question": "which entity is not green?", "label": 0}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", " a pink, ear-shaped object."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "0e08d777c7b948a784dc15748e7b173f"], "properties": ["color, shape, and size", "shape is ear, color is pink, material is plastic"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "a black and white illustration of a rock on a white background"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d model of a white object on a gray background"], "question": "which object is pink", "label": 1}, {"captions": [" of a white sculpture, resembling a horse and paper plane, on a gray background.", "\"multiple white cubes arranged in a row on a gray background.\""], "sample_ids": ["179b4438edfc4a43a27a83784f38ff4b", "17c8222d4ce04e518117078e7de6aaed"], "properties": ["color, background, white", "color, background, white"], "captions_pred_pc": ["above a black and white image of a sculpture in the shape of a bird", "a black and white image of a box with the words 'box 2' and 'thoughtful'"], "captions_pred_image": ["a 3d printed sculpture of a horse's head on a gray background", "an image of a white background with a few small cubes on it"], "question": "which object is white", "label": 0}, {"captions": ["a white of a building with columns, stairs, and railings.", " a small building with windows and a roof."], "sample_ids": ["c9ad30f336844b629cb237fa5b0d94f2", "0ef2cac27e364c0687afae7ab5040cc3"], "properties": ["image, building, stairs", "roof, windows, building"], "captions_pred_pc": ["a black and white image of dots on a white background a black and white image of dots on a white background royalty free illustration", "a black and white square made up of small dots on a white background"], "captions_pred_image": ["a 3d model of a multi-level building with stairs and balconies royalty-free 3d model no.", "a 3d model of an apartment building royalty free 3d model preview no 3"], "question": "which building has a roof?", "label": 1}, {"captions": ["a white 3d object featuring black and white patterns, resembling a combination of a dish, smoke detector, cake, bowl, alarm clock, and ceiling light.", " a house with a roof and beams."], "sample_ids": ["6a8cc820f00a4cfc954d56e2b1f6206a", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["- material is plastic- color is white- shape is cylinder", "roof, beams, house"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a plate", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a cake with a black and white design on the top of the cake", "a 3d model of a building with a roof"], "question": "which entity is a house?", "label": 1}, {"captions": [" a room featuring a table and chairs, with blue and green walls.", "a featuring a building, a plane, a printing machine, a large machine, a room with a computer, and a large gray box."], "sample_ids": ["a49899d9a6194583b745e02f3654841e", "7e2b63ba4ce24cecacea67dd052016c1"], "properties": ["color, table, chairs", "building, plane, room"], "captions_pred_pc": ["a close-up of a white object on a white background", "a black and white image of a rectangular frame with dots on a white background a black and white image of a rectangular frame with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a room with a white wall", "a 3d model of a box with a lot of items inside"], "question": "which entity has a room with a computer?", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", "a featuring a small boat, a rock with a hole, and blue water."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "7ccdffc0d6404e8d9144260255ea0c5c"], "properties": ["yellow, table, roof", "water, boat, rock"], "captions_pred_pc": ["a black and white drawing of a floor plan", "a black and white illustration of a surfboard"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d image of an animal laying on the ground"], "question": "which entity has a boat in it?", "label": 1}, {"captions": [" a black and white striped box.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["00fa8accaaad44c780efe0c04ed4a12b", "a17477b445b3443189dad22f768b888b"], "properties": ["color, black, white", "roof, pillar, stairs"], "captions_pred_pc": ["in 15 words or less a black and white pattern on a white background", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d image of a black and white striped surface", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", " a castle on an island with a small floating house, trees, and clouds."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "c4c09479570943e2845fbd4c6a450568"], "properties": ["yellow, table, roof", "castle, island, house"], "captions_pred_pc": ["a black and white drawing of a floor plan", "above a black and white illustration of a group of dots in the shape of a circle"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of a small house on an island"], "question": "which entity has a castle on an island?", "label": 1}, {"captions": [" of a japanese-style pagoda house in a pixelated village.", " a large, ancient stone building, resembling a roman structure and a medieval castle."], "sample_ids": ["42c4e6ca4a0c4b7b9a97d543b2442222", "f96abfafd34040a4bb09f5e2973403e9"], "properties": ["image is a japanese-style pagoda house in a pixelated village", "building, material, stone"], "captions_pred_pc": ["a black and white drawing of a square on a white background stock illustration \u00a9 2019 iStock", "a black and white drawing of the letter 'l' on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a japanese temple and pagoda", "3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosse"], "question": "which building is made of stone", "label": 1}, {"captions": [" a small gnome chandelier with three candles, featuring red and blue lights and adorned with pink and blue ribbons.", " a wooden staircase with a railing and table."], "sample_ids": ["6398f4e75a2d480da58396827ac64249", "956247bea850458199c651037d4b1d7f"], "properties": ["- color is red, blue, pink", "railing, table, staircase"], "captions_pred_pc": ["for a black and white image of an object on a white background", "above a black and white drawing of a window"], "captions_pred_image": ["a white chandelier with three light bulbs hanging from the ceiling", "a 3d model of a table with a staircase"], "question": "which object has a table?", "label": 1}, {"captions": [" a leather recliner chair and ottoman set, featuring swivel functionality and available in modern orange, tan, and brown colors.", "a white of a woman with her arms outstretched."], "sample_ids": ["44be138ae8e2409bbbca44a96fc67d45", "2cbfaa4fb2d84a6f94e67b4fd6e2f26f"], "properties": ["color, tan, brown, orange", "image, color, white"], "captions_pred_pc": ["above a black and white illustration of an office chair", "a black and white silhouette of a person on a skateboard"], "captions_pred_image": ["a grey leather lounge chair with ottoman and footstool", "a 3d model of a woman with her arms outstretched"], "question": "which entity is not a color image?", "label": 1}, {"captions": [" a diverse town featuring houses, buildings, people, animals, and desert elements.", " a wooden table with a plant on it."], "sample_ids": ["436d6492fa06466680ecc82e5e07a7a0", "3170a8fc0ebf4d71ab19c723be68987f"], "properties": ["house, building, people", "table, plant, wood"], "captions_pred_pc": ["a black and white drawing of a person sitting on a bench", "a 3d model of the molecule on a white background a 3d model of the molecule on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small town in the middle of a field", "a side table with a potted plant sitting on top of it"], "question": "which entity is made of wood", "label": 1}, {"captions": ["a white teddy bear, cloud, skull, octopus, and bird in various positions on a gray background.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["dd5849aced0443b1b4b38d413f7e06c4", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["background, color, white", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white image of a cat's head", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of an animal skull in white on a gray background", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a witch wearing a black and white hat.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["1516bf87d6ee47fa9ede71bf77757b29", "06a1c233fb444830b577aa06e2c01294"], "properties": ["color, black, white", "house, tree, hill"], "captions_pred_pc": ["a black and white illustration of a person wearing a wizard's hat and standing next to a snowflake", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a 3d model of a witch wearing a hat and carrying a broom", "a black and white image of a house in the middle of a field"], "question": "which entity has more trees", "label": 1}, {"captions": [" a white car alternator, motor, and fan with its parts.", "a featuring a fireplace with a broom, shovel, and other items, a man and a dog, a table with a bucket, and a vase with a pipe."], "sample_ids": ["4f3258f5264b46b1b839d992ca1bad68", "fdcbb46224a44faca5c3fb20264eb2e7"], "properties": ["color, alternator, fan", "broom, shovel, fireplace"], "captions_pred_pc": ["a black and white image of an object on a white background", "a close-up view of a white plastic bag with a small hole in it"], "captions_pred_image": ["a 3d model of a fan and its components on a white background", "a 3d model of an outdoor fireplace"], "question": "which entity has a fireplace?", "label": 1}, {"captions": [" a building with blue lines and structure.", " a small building with a yellow roof, featuring a box, a chair, and a plane flying overhead."], "sample_ids": ["3bf337b699664ca0adf0817962d58718", "a2354f13774340d392fbf33564934aab"], "properties": ["color, shape, structure", "building, roof, yellow"], "captions_pred_pc": ["a black and white drawing of a floor plan", "a black and white image of a cell phone"], "captions_pred_image": ["a 3d model of a building under construction", "a 3d rendering of a machine with a conveyor belt"], "question": "which building has a yellow roof", "label": 1}, {"captions": [" a yellow and white structure featuring lamp posts, bridge, poles, pier, stairway, and hanging rods.", " a small house with trees and plants, featuring a white box with a blue lid and a green triangle, accompanied by a christmas tree."], "sample_ids": ["34257a26ad2e4c6d91ef6d5cd4bd7c43", "4a889132cc444d10bfcbf6c760984416"], "properties": ["color, pier, stairway", "a, color, white"], "captions_pred_pc": ["above a black and white illustration of an open door", "a black and white illustration of a dandelion on a white background dandelion illustration on a white background stock illustration \u00a9 iStock/Getty Images"], "captions_pred_image": ["a 3d model of a long bench on a white background royalty free 3d model no.", "a 3d model of a desk and chair"], "question": "which entity has a pier?", "label": 0}, {"captions": [" of a green bush with tree-like leaves.", " of a white building with a small house and a desk with a laptop."], "sample_ids": ["cb91cb6149a142a8a196a268dcf36aa8", "9244a2d3a9e94c8398ef991f1661bb58"], "properties": ["leaf, color, shape", "a, desk, laptop"], "captions_pred_pc": ["a black and white dots on a white background a black and white dots on a white background stock illustration", "a black and white image of a piece of furniture"], "captions_pred_image": ["a 360 degree view of a bushy plant on a gray background royalty free 360 degree view of a bushy plant on a gray background", "a 3d model of an office desk on a white background"], "question": "which entity has a desk with a laptop", "label": 1}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", " a small house with stairs and a roof."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "e9305c80010f4e3b9de9789f01a9bee5"], "properties": ["color, material, structure", "roof, stairs, house"], "captions_pred_pc": ["a black and white drawing of a room with dots", "above a black and white image of a square with dots all over it"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a 3d rendering of a podium on a wooden floor"], "question": "which structure has a roof", "label": 1}, {"captions": [" a small white gravestone on a grassy field.", " of two rocks with ice elements."], "sample_ids": ["b43779ae94f74bffba4f29863518f506", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["location, field, gravestone", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white illustration of a whale", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake on a white background 3d model of a snowflake", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": ["a white of a gun.", " a stone throne with stairs and a tree, accompanied by a concrete wall featuring a statue and a fireplace with a shelf above it."], "sample_ids": ["d7c12235efd1471db5b7145b63dbd11a", "93fb4197f0014f7582029af24c7ed9de"], "properties": ["color, white, gun", "throne, stairs, tree"], "captions_pred_pc": ["of a white object on a white background", "in 15 words or less a black and white image of a toilet paper roll on a white background royalty free illustration"], "captions_pred_image": ["a white 3d model of a rifle on a gray background", "a 3d model of a throne with a tree on it"], "question": "which object has more stairs", "label": 1}, {"captions": [" of a large black mat with square grid design.", " of a stone fountain with a lion statue, surrounded by an archway inspired by the arch of triumph."], "sample_ids": ["72aac2e9ccd7482eb88e5e4bc204fbf3", "a72700696c3b44ef8101d1e71e914bc9"], "properties": ["size, color, design", "lion, statue, fountain"], "captions_pred_pc": ["a black and white pattern on a white background", "a black and white image of a metal object"], "captions_pred_image": ["a 3d rendering of a black rubber mat on a gray surface", "a 3d image of a lion statue on a wall"], "question": "which is not a fountain", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " of a plague mask with a rusty, horned, wooden helmet and a crow's head design."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "2b0896f810074399a5ae7d6dbab8c330"], "properties": ["color, white, black, white", "- material is wood, rusty, horned"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "3d model of a plague doctor's mask"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a city featuring various buildings, including one with a white roof and numerous white cubes, as well as a plane.", " a house featuring a pitched roof structure with brick detailing."], "sample_ids": ["a3c50635c2a04e548e57d4f027899131", "5fbd274f897b44fcafa02ee84228debf"], "properties": ["building, roof, white, cubes", "structure, roof, pitch"], "captions_pred_pc": ["above a black and white image of a map", "a black and white illustration of a square with a lot of dots on it"], "captions_pred_image": ["a 3d model of a city on a white background", "a 3d model of the roof of a house"], "question": "which building has a pitched roof", "label": 1}, {"captions": ["a green and red toy gun with a scope and yellow accents.", " a row of houses featuring roof structures with green roofs and tiled roof slats."], "sample_ids": ["4b7263b58a6647c3a03226b39c42108a", "aef9b23a78a7450286a961cc13448d00"], "properties": ["color, red, green, yellow", "roof, green, tiled"], "captions_pred_pc": ["a black and white image of a toothbrush", "of a black and white photo of a decorative metal wall hanging"], "captions_pred_image": ["a 3d model of an assault rifle on a gray background", "a 3d model of a set of stainless steel shelves"], "question": "which entity has a green roof", "label": 1}, {"captions": [" a house featuring a purple roof and a suspended ceiling with a light fixture.", " a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light."], "sample_ids": ["579b43057ef74bd08d06bdd3e8d973a0", "5ea0962b100b4fccb761ed84afe027b5"], "properties": ["roof, purple, suspended", "house, table, chair"], "captions_pred_pc": ["a black and white image of a piece of paper with a pattern of dots on it", "above a black and white photograph of an open door"], "captions_pred_image": ["a 3d model of the roof of a building royalty free 3d model no.", "a 3d rendering of a small white table with a chair"], "question": "which house has a table and chair?", "label": 1}, {"captions": [" of a house with multiple roofs and a suspended ceiling, featuring a white and purple tray.", " a small house on an island with a boat and a bird on a rock."], "sample_ids": ["7907916e8f524df5b16eb566497db83e", "1e56a92a0ddc41e59694bd1ad1656149"], "properties": ["color, roof, tray", "house, rock, bird"], "captions_pred_pc": ["a black and white image of a metal object", "a black and white drawing of a boat in the middle of a body of water"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d rendering of a house on a rock"], "question": "which house has a bird on a rock?", "label": 1}, {"captions": [" of a white plastic tube or metal bar, resembling a knife.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["8fd3836862a44a8d8b4d224bfc30c2c3", "c3a82df41875402285608ef13a55df57"], "properties": ["a knife, blade, handle", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white image of a shelf with a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a piece of white plastic on a gray background", "a white plastic object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a vibrant city skyline featuring various colored buildings, trees, and skyscrapers.", " a small house with a roof and ceiling-mounted air conditioner."], "sample_ids": ["1a1fb9b0d83845f6b1238fb45e0defff", "6965067ea9e34357a7af21a2f078fbac"], "properties": ["color, skyline, buildings", "roof, air conditioner, house"], "captions_pred_pc": ["a black and white illustration of a city skyline", "a black and white illustration of a window"], "captions_pred_image": ["a black and white 3d model of a city skyline", "a 3d rendering of a small house with a covered porch"], "question": "which entity has a roof", "label": 1}, {"captions": [" of a white supreme logo t-shirt, low poly design.", "a white of a city landscape with various objects and a ceiling light."], "sample_ids": ["bea8441c08d94366b96b53775391d8e6", "770e577c9795435898209fd24191635b"], "properties": ["color, white, logo", "image, color, light"], "captions_pred_pc": ["for a black and white image of a shirt with dots", "a black and white drawing of a window in the shape of a square on a white background a black and white drawing of a window in the shape of a square on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white t-shirt with a supreme logo", "a white 3d model of a small town"], "question": "which image is white", "label": 1}, {"captions": [" a house with a green, wooden-structured roof.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["d58bb529b1434809a64b62f1b2899c3f", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["roof, color, green", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["a black and white image of the letter 'l' made up of dots on a white background", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a house with a triangular roof", "a white 3d model of a city skyline"], "question": "which image shows a laptop?", "label": 1}, {"captions": ["a featuring a sailboat with a hook, a bird suspended in the air, and a pair of scissors.", " a stack of books, a pile of paper, and a lamp with a black and white shade."], "sample_ids": ["f57ae66555d34349aeadc38b33f8f267", "6a06b505bcb34026a07ac15931f9f6f3"], "properties": ["a, bird, hook", "books, paper, lamp"], "captions_pred_pc": ["of a 3d scan of a person's torso and limbs", "a black and white silhouette of a map of the state of new york"], "captions_pred_image": ["a black and white photo of a kite flying in the sky", "a black and white photograph of a toilet paper holder"], "question": "which entity has more paper", "label": 1}, {"captions": ["white candle with a yellow flame in a candle holder.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["c3d85aaabddf4585b2468a5bca9f51d5", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, flame, white", "a, material, clay"], "captions_pred_pc": ["a black and white image of a dotted circle on a white background a black and white image of a dotted circle on a white background royalty free illustration", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a white candle in the shape of a teapot", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": [" a house with a blue roof, chimney, and wooden-beamed ceiling.", " a building with a purple, glass roof and a suspended ceiling featuring beams."], "sample_ids": ["b380dd4800124a8d96424a504eb0ec6a", "a54c746bb2644e3ea4e53ee65e32df64"], "properties": ["roof, color, blue", "roof, glass, purple, ceiling, beams"], "captions_pred_pc": ["of a white lace clutch purse on a white background", "the letter 't' is made up of tiny white dots on a white background"], "captions_pred_image": ["a 3d model of a building with many windows", "a 3d model of a building with a roof"], "question": "which roof is made of glass", "label": 1}, {"captions": ["a collection of white s featuring a person holding a bat, riding a horse, holding a sword, riding a skateboard, as a batman character, and holding a baseball bat.", " a suitcase on wheels, featuring a bird, a laptop, a robot, and a vacuum cleaner."], "sample_ids": ["ce4453db136e4e9db2ca0f86814059b4", "e8100bef7b8a48d4ac79684bffb349ba"], "properties": ["s, batman, batman character", "Wheels, laptop, robot"], "captions_pred_pc": ["above a black and white illustration of a man in a batman costume", "a black and white drawing of a cell phone"], "captions_pred_image": ["a 3d model of a superhero with a sword in his hand royalty free 3d model preview no.2", "a black and white image of a suitcase on wheels"], "question": "which entity has a laptop?", "label": 1}, {"captions": [" a cartoon character wearing a hat, holding a bag, a baseball bat, and a blanket, resembling a toy animal.", " a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog."], "sample_ids": ["7e7272f3ddc24551905eccb63f3da42e", "f6c6e7a65a3e42dfa431b1d984c72f28"], "properties": ["hat, bag, blanket", "house, fence, dog"], "captions_pred_pc": ["a black and white illustration of a cell with dots", "above a black and white drawing of a bathroom"], "captions_pred_image": ["a 3d printed sculpture of a baseball player holding a bat", "a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a"], "question": "which entity has a fence", "label": 1}, {"captions": [" a set of yellow and black tools, including a magnifying glass, hexagonal keys, and screwdrivers.", " a mountainous landmass, resembling a small island or state like kentucky or wyoming."], "sample_ids": ["40285a60f32749a8ae38957c7b073fe8", "8ca9b999b69c4965bd9eb4445d605bf2"], "properties": ["color, yellow, black", "mountainous, landmass, state"], "captions_pred_pc": ["a set of three black and white keychains on a white background", "a black and white map of the state of new york"], "captions_pred_image": ["a 3d model of a pair of hexagonal keys and a pair of hexagonal scissors royalty free 3d model preview no 3", "a 3d model of a piece of paper"], "question": "which entity is not a state?", "label": 1}, {"captions": [" featuring a chair, table, and refrigerator.", "a featuring a red hat, floating cup, bowl filled with candy, and a strawberry."], "sample_ids": ["cd967e38e9364ed28c4090e905740c9d", "e27a9fd533dc41da9cf2eeb8fee2a5af"], "properties": ["chair, table, refrigerator", "hat, candy, strawberry"], "captions_pred_pc": ["a 3d rendering of a white surface with black dots on it", "a black and white illustration of two spheres"], "captions_pred_image": ["a 3d model of a chair, a table, and a refrigerator", "a black and white image of a person wearing a hat"], "question": "which entity has a floating cup?", "label": 1}, {"captions": ["a 3d green toy dinosaur with a purple hat, horn, and accents, featuring a purple flower.", " a woman in a red dress holding a tennis racket, wearing a hat."], "sample_ids": ["f8cfe5430a8a4431aacf32f49b20220d", "b89b19ddadd04d6799e90b611c889bae"], "properties": ["color, hat, flower", "hat, dress, racket"], "captions_pred_pc": ["for a 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the human liver 3d illustration of the", "a black and white illustration of a dendritic cell"], "captions_pred_image": ["a 3d model of a purse on a white background royalty-free 3d model preview no. 3", "a black and white photograph of a woman holding a tennis racket"], "question": "which entity has a hat", "label": 1}, {"captions": ["a small yellow and green sphere resembling saturn with a hat.", "star wars stormtrooper "], "sample_ids": ["6811a2f3d1154dccb37d534ae673e673", "05678f11f4fe47178c9b4941ee334e16"], "properties": ["color, shape, size", "a, color, white"], "captions_pred_pc": ["a black and white illustration of a dotted circle on a white background a black and white illustration of a dotted circle on a white background royalty free illustration", "a black and white illustration of a stormtrooper"], "captions_pred_image": ["a 3d model of the planet saturn royalty free 3d model preview no.2", "a 3d model of a star wars stormtrooper"], "question": "which entity is white", "label": 1}, {"captions": ["s of a rock, boat, plane, and leaf on a stick.", " a suitcase on wheels, featuring a bird, a laptop, a robot, and a vacuum cleaner."], "sample_ids": ["be0884a7ced34b3d92687b6087798a1e", "e8100bef7b8a48d4ac79684bffb349ba"], "properties": ["s, stick, leaf", "Wheels, laptop, robot"], "captions_pred_pc": ["above a black and white drawing of an object floating in the sky", "a black and white drawing of a cell phone"], "captions_pred_image": ["a black and white photograph of a rock on a sandy surface", "a black and white image of a suitcase on wheels"], "question": "which entity has a laptop?", "label": 1}, {"captions": ["a white featuring a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet.", " a house with a green, wooden-structured roof."], "sample_ids": ["f28f57745af04115b0bacdde80c3b9ee", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet", "roof, color, green"], "captions_pred_pc": ["a black and white illustration of a pair of sunglasses", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a 3d model of a medieval drinking horn 3d model available for 3d printing royalty free 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d", "a 3d model of a house with a triangular roof"], "question": "which entity is a building?", "label": 1}, {"captions": ["3d superman logo in red and yellow", " a toy motorcycle, car, and robot on an orange platform."], "sample_ids": ["2a08f2f254934c1aabe73021453bc828", "7407a108e0354925b83b750339bc03df"], "properties": ["color, red, yellow", "platform, color, orange"], "captions_pred_pc": ["in 15 words or less a superman logo made out of dots", "a black and white illustration of a bicycle"], "captions_pred_image": ["a 3d model of the superman logo", "a 3d model of a motorcycle on a pedestal"], "question": "which entity has a platform that is orange?", "label": 1}, {"captions": ["a 3d wooden pole with a metal spear handle.", " of a plague mask with a rusty, horned, wooden helmet and a crow's head design."], "sample_ids": ["1d5cf234576e41f0ba209c5e19d2db47", "2b0896f810074399a5ae7d6dbab8c330"], "properties": ["- material is wood, metal, metal", "- material is wood, rusty, horned"], "captions_pred_pc": ["of a black and white illustration of a vase on a pedestal", "in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of an old-fashioned pitchfork", "3d model of a plague doctor's mask"], "question": "which object is made of wood", "label": 1}, {"captions": [" a white and black horned demon with bunny features.", " a tree with green leaves."], "sample_ids": ["1d3537d1341a423f89990e9b06924904", "1a902b67735845198efb1b84f434b580"], "properties": ["color, horns, features", "leaf, color, green"], "captions_pred_pc": ["above a black and white image of a pair of headphones", "for a black and white drawing of a tree"], "captions_pred_image": ["a 3d model of a skull with horns on its head", "a 3d model of a tree with a lot of foliage"], "question": "which entity has leaves that are green?", "label": 1}, {"captions": ["a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars.", " of a small white dollhouse featuring furnished rooms, including a bedroom with a bed and desk, and a bathroom."], "sample_ids": ["1b3945962a4b4cda9fe939dc5d63e789", "f178fb523ad7421aaa90a92ee736ee00"], "properties": ["a room, a cake, a table", "bedroom, bathroom, bed"], "captions_pred_pc": ["a black and white illustration of an object on a white background", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d rendering of a white room with various items in it", "a 3d model of a small room with a bed, desk, and chair"], "question": "which entity has a bathroom?", "label": 1}, {"captions": ["a small white 3d boat model with a curved wing and knife.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["cafc467aff2643e8b70149c4944263ee", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["wing, knife, boat", "house, pool, balcony"], "captions_pred_pc": ["a black umbrella on a white background", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a rowing boat royalty free 3d model preview no 3", "a 3d model of a modern house"], "question": "which entity has a pool", "label": 1}, {"captions": [" of a small wooden house with two roofs.", " of a small wooden house with a blue roof, featuring a police sign and resembling a lighthouse."], "sample_ids": ["30fc23ae4edb42609e30e029dede54bd", "9b2c93d651c3409096118c5ce5b993f2"], "properties": ["house, roof, wooden", "house, roof, blue"], "captions_pred_pc": ["of a pair of stainless steel screws on a white background", "a black and white illustration of a coffee mug on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small barn", "a 3d model of a small house and barn"], "question": "which house has a blue roof", "label": 1}, {"captions": [" a wooden door with a lock, handle, and a piece of paper on it.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["78762b19b7dc4823a0033ec63f092ca5", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["door, lock, handle", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a black and white image of a door with a crack in it", "a white 3d model of a city skyline"], "question": "which image shows a laptop?", "label": 1}, {"captions": [" a house with a roof, roof truss, and suspended ceiling structure.", " a spider-like creature with long arms and legs."], "sample_ids": ["5abf69f79b92484fb54d41ff0c0a2c11", "199bcb789e0c439bb2eeb32f2425cc36"], "properties": ["roof, truss, suspended ceiling", "arachnid, leg, arm"], "captions_pred_pc": ["a suitcase made of dots on a white background a suitcase made of dots on a white background royalty free illustration", "a black and white illustration of a spider on a white background"], "captions_pred_image": ["a 3d model of a house with roof trusses", "a black and white image of an alien creature"], "question": "which entity has more legs", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", " a building with blue metal framing and structure."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "ce40210c2a7e49dfaebbd934ccec4eca"], "properties": ["color, shape, and size", "color, blue, structure"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "a black and white image of dots on a white background"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d model of a building under construction"], "question": "which entity is a building?", "label": 1}, {"captions": [" a red and brown rock formation with a broken tree branch.", " a small house with a tree in front, situated on a hill."], "sample_ids": ["c29d48d320c04ed1bf5aafe0a3df3d78", "3bde44b5f10946398f1bb9843dc14caa"], "properties": ["color, shape, texture", "house, tree, hill"], "captions_pred_pc": ["a black and white silhouette of an island on a white background", "a black and white photo of a cell phone in a puddle of water"], "captions_pred_image": ["a 3d image of a rock formation on a snowy surface", "a 3d model of a house in the middle of a field"], "question": "which entity is a house?", "label": 1}, {"captions": [" a small house with a pond and situated on a rock.", "a featuring a boat, table, chairs, umbrella, and solar panel."], "sample_ids": ["92859eb82a344134806b37cc209927c6", "0f0eb3a198d341d28f809b6d7634be8a"], "properties": ["house, rock, pond", "boat, table, chairs, umbrella, solar panel"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a toaster", "a black and white illustration of a boat with an umbrella"], "captions_pred_image": ["a 3d model of a house in the middle of a field", "a 3d model of a boat, a table, chairs, and an umbrella"], "question": "which entity has a solar panel", "label": 1}, {"captions": [" a truck with luggage on top and various parts.", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["a6d5c3b54ebd4d17ba73f86d1527793c", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["Parts, Luggage, Truck", "lizard, rock, stuffed animal"], "captions_pred_pc": ["for a black and white image of a person holding a paintbrush", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small vehicle with a trailer attached to it royalty free 3d model preview no.2", "a 3d model of a vehicle with wheels and tires"], "question": "which entity has a stuffed animal?", "label": 1}, {"captions": [" a house with a blue roof, chimney, and wooden-beamed ceiling.", " a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog."], "sample_ids": ["b380dd4800124a8d96424a504eb0ec6a", "f6c6e7a65a3e42dfa431b1d984c72f28"], "properties": ["roof, color, blue", "house, fence, dog"], "captions_pred_pc": ["of a white lace clutch purse on a white background", "above a black and white drawing of a bathroom"], "captions_pred_image": ["a 3d model of a building with many windows", "a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a"], "question": "which house has a fence?", "label": 1}, {"captions": [" a small gnome chandelier with three candles, featuring red and blue lights and adorned with pink and blue ribbons.", " a house with a wooden-framed roof structure."], "sample_ids": ["6398f4e75a2d480da58396827ac64249", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["- color is red, blue, pink", "roof, material, wood"], "captions_pred_pc": ["for a black and white image of an object on a white background", "a black and white drawing of a staircase"], "captions_pred_image": ["a white chandelier with three light bulbs hanging from the ceiling", "a 3d model of a building with a roof"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": ["a 3d white box with an open door and lid.", " a clay pot with holes in it."], "sample_ids": ["4e95f0eca97f48d6af1888a8bacec9f6", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["- color is white - shape is box - material is plastic", "hole, material, clay"], "captions_pred_pc": ["a black and white square with dots all over it", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a white box with an open lid", "a clay sculpture of a face with holes in it"], "question": "which object is made of clay", "label": 1}, {"captions": [" a white and yellow metal shelving unit with a steel structure, yellow legs, and suspended ceiling system.", " of a metal tool with a yellow handle, a laptop, and a ceiling light fixture."], "sample_ids": ["c1a7d264b34841409009b3d5d39d5b99", "b714bf13e9e54acb867c2c1b3ccf8ae8"], "properties": ["Steel, Color, Yellow", "metal, laptop, light fixture"], "captions_pred_pc": ["a black and white illustration of a building", "for a black and white image of a corner shelf"], "captions_pred_image": ["a 3d model of a table with multiple tables and chairs", "a 3d model of a telescope on a stand"], "question": "which entity is made of metal", "label": 1}, {"captions": [" a wooden-framed house with roof trusses.", " of a house with a pink roof."], "sample_ids": ["e60dd370c5ec468da4689a801f951157", "6162909df6294848a8eea83c3aa9585b"], "properties": ["frame, roof, trusses", "color, roof, pink"], "captions_pred_pc": ["a black and white drawing of a metal grate", "a black and white drawing of the letter 'p' on a white background illustration"], "captions_pred_image": ["a 3d model of a house under construction", "a 3d model of a house in the style of the 1920s and 1930s"], "question": "which house has a roof that is pink", "label": 1}, {"captions": [" a house featuring a roof, floor plan, heating system, and ceiling light fixture.", " a small triangular-shaped object."], "sample_ids": ["269939560e08432f9e134309b9a1c587", "2d02985030804209a26c2c53b96a06f9"], "properties": ["floor plan, heating system, ceiling light fixture", "shape, triangle, small"], "captions_pred_pc": ["a black and white drawing of a house", "a black and white image of a piece of metal"], "captions_pred_image": ["a 3d model of a building with a glass facade", "a black piece of furniture on a white background"], "question": "which object is a triangle?", "label": 1}, {"captions": [" of a computer with a green screen, keyboard, and white box.", "a black and gold cylindrical object with a gold handle, resembling a cigarette lighter, lamp, and pen."], "sample_ids": ["fb1e5a04ef4644f98219e1d5d52ab073", "c9b1c89380e947f58aa06eb56c93c6d8"], "properties": ["screen, keyboard, box", "- color is black and gold- shape is cylindrical- material is metal"], "captions_pred_pc": ["a box made up of many small dots on a white background a box made up of many small dots on a white background royalty free illustration", "a black and white image of a circular object on a white background"], "captions_pred_image": ["a 3d model of a vintage computer royalty-free 3d model preview", "a 3d model of a black and white object on a gray background"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a black and white coffee table with a laptop, featuring a performance studio and ceiling grid structure.", "a 3d rendered coffee table with a black and brown base and a square ceiling light."], "sample_ids": ["a177693cc8c7428292680816001b48c6", "27a365f067004d9c9c58e40c12827ce0"], "properties": ["black, white, laptop", "baseColor, black, brown, tableTopColor, brown, black"], "captions_pred_pc": ["a black and white drawing of a room with dots on the floor", "a black and white image of a patterned square on a white background"], "captions_pred_image": ["a dishwasher with a dish inside it", "a 3d model of a coffee table"], "question": "which table has a brown and black top", "label": 1}, {"captions": [" three differently colored wooden cubes with holographic patterns and a light inside one cube.", "a featuring a building, a plane, a printing machine, a large machine, a room with a computer, and a large gray box."], "sample_ids": ["1be0870f6d324089a3e3b60a029df6f8", "7e2b63ba4ce24cecacea67dd052016c1"], "properties": ["color, material, shape", "building, plane, room"], "captions_pred_pc": ["of a set of 3 black ceramic tiles on a white background", "a black and white image of a rectangular frame with dots on a white background a black and white image of a rectangular frame with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of three cubes on a white background", "a 3d model of a box with a lot of items inside"], "question": "which entity has more rooms", "label": 1}, {"captions": ["a 3d-printed green plastic cylinder with a hole in the middle.", "a white of a woman with her arms outstretched."], "sample_ids": ["9faa0c251d394f368f4f537ea21f977f", "2cbfaa4fb2d84a6f94e67b4fd6e2f26f"], "properties": ["color, material, shape", "image, color, white"], "captions_pred_pc": ["a black and white image of a glittering object", "a black and white silhouette of a person on a skateboard"], "captions_pred_image": ["a 3d model of a white object on a gray background", "a 3d model of a woman with her arms outstretched"], "question": "which entity is a white image?", "label": 1}, {"captions": ["s of a rock, boat, plane, and leaf on a stick.", " a house with a pink roof, brick walls, and insulated ceiling."], "sample_ids": ["be0884a7ced34b3d92687b6087798a1e", "c8936ace72954650b4e2d84246964849"], "properties": ["s, stick, leaf", "roof, color, pink"], "captions_pred_pc": ["above a black and white drawing of an object floating in the sky", "a black and white drawing of a toilet"], "captions_pred_image": ["a black and white photograph of a rock on a sandy surface", "a 3d model of a house with a roof"], "question": "which entity has a roof that is the color pink?", "label": 1}, {"captions": ["a 3d yellow robot with \"keep out\" written on it.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["663b8e6d4321490c92a4521bfd1ee763", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["color, shape, text", "roof, color, yellow"], "captions_pred_pc": ["a black and white drawing of a cell phone on a white background royalty free illustration", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a robotic arm royalty free 3d model preview no 3", "a 3d model of a house with a roof"], "question": "which entity is a building?", "label": 1}, {"captions": [" a black metal shelf with four holes and a laptop on it.", " a small black house with a green roof, resembling a shed or container."], "sample_ids": ["b3b6f91d939d4193a0090eaabd39eb47", "bdb8e4c36ccb477890fd6ae569ae305c"], "properties": ["black, laptop, shelf", "black, roof, green"], "captions_pred_pc": ["a close up of a black and white tile on a white background", "a black and white drawing of a square with dots all over it"], "captions_pred_image": ["a 3d rendering of a black metal shelf", "a 3d model of a small black building"], "question": "which entity is a building?", "label": 1}, {"captions": [" a small house with a roof and ceiling-mounted air conditioner.", " a flying bird, resembling a crow and a pigeon."], "sample_ids": ["6965067ea9e34357a7af21a2f078fbac", "5ec78c8b6ab54f739adb0b46d216a454"], "properties": ["roof, air conditioner, house", "bird, resembles, crow, pigeon"], "captions_pred_pc": ["a black and white illustration of a window", "above a black and white illustration of an airplane on a white background"], "captions_pred_image": ["a 3d rendering of a small house with a covered porch", "a black and white image of a bird in flight"], "question": "which entity is a bird?", "label": 1}, {"captions": [" a red circular object with a checkered pattern, resembling a round pillow or bed cover.", " of two rocks with ice elements."], "sample_ids": ["9cf9fb6d07084488892422a5a5be00ef", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["pattern, color, shape", "image is a rock with ice elements"], "captions_pred_pc": ["a black circle on a white background", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a round cushion royalty-free 3d model preview", "a 3d image of two rocks on a gray surface"], "question": "which entity is a rock?", "label": 1}, {"captions": ["a 3d collection featuring a cash register, destroyed car, pos machine with credit card machine, broken cell phone, black and blue phone, atm machine, crocodile's head, and broken roof.", " a damaged room with destroyed furniture, featuring a kitchen, bathroom, and broken window."], "sample_ids": ["d9681d1f6fad42ab8d498cba24339ca8", "06dd6456dc244a51b6b6e1c8524820de"], "properties": ["pos machine, credit card machine, cash register", "room, furniture, window"], "captions_pred_pc": ["a black and white illustration of a glass bottle", "above a black and white drawing of a person sitting on a bench"], "captions_pred_image": ["a vintage cash register sitting on top of a table", "a 3d image of a room with a person in it"], "question": "which entity has a damaged window", "label": 1}, {"captions": [" a house with a pink roof, truss, and a square white ceiling lamp.", " a house featuring a roof with truss system, framing, insulation, and a ceiling light."], "sample_ids": ["91b2e9e4660946f5b4808a18b5323b69", "39876e69e3914d99a07e0dc59611c5c0"], "properties": ["roof, truss, lamp", "roof, truss system, framing"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white drawing of a window with dots all over it"], "captions_pred_image": ["a 3d model of a house with a metal roof", "a 3d model of the roof of a house"], "question": "which entity has a roof with truss system", "label": 1}, {"captions": [" a black and white object, possibly a car, arrow, or light fixture.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["ff2fc36f68c642b6940407bed2ee6917", "b896a0898efe4059a776193c02132129"], "properties": ["color, black, white", "- material is stone, metal, concrete"], "captions_pred_pc": ["for a black and white image of a pair of scissors", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of an airplane on a white background", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": ["a 3d low-poly helmet model with green and beige colors and wings.", " a stone throne with stairs and a tree, accompanied by a concrete wall featuring a statue and a fireplace with a shelf above it."], "sample_ids": ["b9f40c80d70e432390780273137dcbc0", "93fb4197f0014f7582029af24c7ed9de"], "properties": ["color, material, texture", "throne, stairs, tree"], "captions_pred_pc": ["a black and white image of a spider's head", "in 15 words or less a black and white image of a toilet paper roll on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a futuristic helmet with wings", "a 3d model of a throne with a tree on it"], "question": "which entity has a throne", "label": 1}, {"captions": [" of a white plastic tube or metal bar, resembling a knife.", " a rusty green metal box with a handle and a gun inside."], "sample_ids": ["8fd3836862a44a8d8b4d224bfc30c2c3", "76cfd0e88ce243d483919a018a4f1a9e"], "properties": ["a knife, blade, handle", "box, handle, gun"], "captions_pred_pc": ["a black and white image of a shelf with a white background", "a black and white square with dots on a white background"], "captions_pred_image": ["a piece of white plastic on a gray background", "a 3d rendering of a metal box with a handle"], "question": "which entity has a handle", "label": 1}, {"captions": [" a crab with long legs, wires, and tentacles, resembling a hybrid of an octopus, squid, spider, and robot.", " a wooden table and bench with a deer head and branch on it."], "sample_ids": ["ec5914b53b6a4cde8de4820050bc46c5", "857d5391612349f4ae6cd854a1ec96de"], "properties": ["resembles, octopus, squid, spider, robot", "table, bench, deer"], "captions_pred_pc": ["a black and white illustration of a jellyfish", "a black and white drawing of a table and chairs"], "captions_pred_image": ["a 3d model of a robotic octopus on a white background royalty free 3d model preview no.2", "a black and white image of a bench and table with a deer's head on the table"], "question": "which entity is a table?", "label": 1}, {"captions": ["a 3d object featuring a flat roof with blue and white pattern, a purple and white bench, a blue and purple striped runner, a bed with blue and purple stripes, blue and white striped paper, a bench with a blue and purple pattern, and a ceiling light fixture with wooden slats.", " a large metal building with a roof and truss structure."], "sample_ids": ["28d0cf71ed684dcb87b2c9b2744c3633", "b85a99699ccd4bcba213322113bb253d"], "properties": ["runner, bed, bench", "roof, truss, structure"], "captions_pred_pc": ["a black and white drawing of a snowflake on a white background a creative black and white drawing of a snowflake on a white background royalty free illustration", "of a metal grate on a white background"], "captions_pred_image": ["a 3d model of a long, curved structure", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": [" a small white house with stairs and a wall-mounted shelf.", " of a small wooden house with a roof."], "sample_ids": ["10c4ba5b0db4490db9c00c21c94cb41f", "f5904a9d87ff4fa688146c18c1f27fec"], "properties": ["house, color, white", "roof, house, wooden"], "captions_pred_pc": ["above a black and white drawing of a bench", "a black and white drawing of a house with dots"], "captions_pred_image": ["a 3d model of a small white building", "a 3d model of a small house"], "question": "which house is made of wood", "label": 1}, {"captions": [" a small white bookcase-like building with stairs and a light fixture.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["5f99eb9d1f1e4d57b5690446f832c841", "97e000ff41094665afd94ea565da8b13"], "properties": ["building, color, white", "roof, material, wood"], "captions_pred_pc": ["in 15 words or less a black and white image of the letter 'f' made up of dots", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d model of a bookshelf on a white background", "a 3d model of the roof of a building"], "question": "which building is made of wood", "label": 1}, {"captions": ["a featuring a green frog face, bunny head, dragon head, flower, monster with a tail, and a monster mask with an open mouth.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["127753bf17de4252aaa7ea88f274545e", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["face, mask, tail", "a, material, clay"], "captions_pred_pc": ["a 3d model of an orchid flower on a white background 3d model of an orchid flower on a white background royalty free illustration", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a goat's head", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": [" a metal-framed wall with red and blue bars in a steel structure.", " of a white human skull with broken bone elements."], "sample_ids": ["fefc99453e2d4406a9668d5697224c0f", "6ca0f91b85464d7a845b3977351dd0b5"], "properties": ["color, red, blue, structure", "color, white, skull"], "captions_pred_pc": ["a black and white image of a person holding a toothbrush", "a black and white image of a cat's x-ray"], "captions_pred_image": ["a 3d rendering of a metal frame structure", "a 3d model of a human skull in white"], "question": "which entity is white", "label": 1}, {"captions": [" a stone throne with stairs and a tree, accompanied by a concrete wall featuring a statue and a fireplace with a shelf above it.", "a white of a man with arms outstretched."], "sample_ids": ["93fb4197f0014f7582029af24c7ed9de", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["throne, stairs, tree", "image, color, white"], "captions_pred_pc": ["in 15 words or less a black and white image of a toilet paper roll on a white background royalty free illustration", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a 3d model of a throne with a tree on it", "a 3d model of a man with his arms outstretched"], "question": "which image is white", "label": 1}, {"captions": ["a featuring a flying object, hand holding a rock, floating paper, cityscape, piece of wood, airborne fish, and street with buildings.", "two white spheres in a ."], "sample_ids": ["e04ad505d0c14dcbb593c49be7d04546", "1c02212a35134545ab63ab180d629c31"], "properties": ["a, hand, holding, rock, paper, floating, fish, street, buildings", "two, spheres, white, in, a"], "captions_pred_pc": ["a black and white illustration of a rock on a white background", "a black and white illustration of two spheres"], "captions_pred_image": ["a black and white image of a rock in the air", "a 3d model of a white ball on a gray background"], "question": "which object is in a?", "label": 0}, {"captions": [" of a white wall-mounted light switch, electrical outlet, and various furniture pieces.", " a small wooden house with a green roof."], "sample_ids": ["b195bf7ba6094e1b812e4312deeeb360", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["light switch, electrical outlet, furniture", "roof, color, green"], "captions_pred_pc": ["a black and white image of a pair of sunglasses", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d rendering of a room with a white background", "a 3d model of a house with a ladder"], "question": "which entity has a roof that is the color of green", "label": 1}, {"captions": ["a black motorcycle helmet with a face mask and visor.", " a house with a wooden-framed roof structure."], "sample_ids": ["ad6df43a2ce24edfb15f5bb64755ed0d", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["color, black, visor", "roof, material, wood"], "captions_pred_pc": ["a black and white circular shape made up of many small dots on a white background a black and white circular shape made up of many small dots on a white background illustration", "a black and white drawing of a staircase"], "captions_pred_image": ["a black motorcycle helmet with a visor on top", "a 3d model of a building with a roof"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a small house with a tree and a rock.", " a white building, table, and various piles of paper, including a low-poly object."], "sample_ids": ["9dc392a7f6e444e5bfb720684d6f864a", "515210fb031f4ec89021ee8ce9e432e9"], "properties": ["house, tree, rock", "- building is white, table is white, piles of paper are white"], "captions_pred_pc": ["in 15 words or less the image depicts a white square on a white background with black dots all over the square stock illustration", "a black and white drawing of a piece of paper"], "captions_pred_image": ["a 3d model of a small house with a tree in front of it", "a 3d model of a snowy landscape"], "question": "which entity has a white building?", "label": 1}, {"captions": [" a multi-level building with yellow stairs and columns.", " a small wooden house with a green roof."], "sample_ids": ["c1536f68727947ff9e7810799fac583a", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["level, stairs, columns", "roof, color, green"], "captions_pred_pc": ["in 15 words or less a black and white illustration of dots on a white background", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d model of an apartment building", "a 3d model of a house with a ladder"], "question": "which building has a green roof", "label": 1}, {"captions": [" of a house with a roof truss, chimney, and suspended ceiling.", " a large steel building with a pool."], "sample_ids": ["9401dfc901b2447a9c0eb27da56854d7", "72eed67d8d884c819b28e2e95eb48f06"], "properties": ["roof truss, chimney, suspended ceiling", "building material, pool, steel"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a", "a black and white illustration of a building with a tree growing out of it"], "captions_pred_image": ["a 3d model of a house with a roof", "a 3d model of a concrete structure"], "question": "which building material is used to build the pool", "label": 1}, {"captions": [" of a woman in a white dress", " of a broken stone wall featuring an angel sculpture."], "sample_ids": ["64d0c9f538204632b5c2b3e4fe959248", "aae2c42740a04fd68068f5707111d26f"], "properties": ["dress, color, white", "image is a sculpture of an angel on a wall"], "captions_pred_pc": ["a silhouette of a girl in a dress", "a black and white image of a toilet paper roll"], "captions_pred_image": ["a 3d sculpture of a woman in a dress walking", "a 3d model of a marble sculpture of an angel"], "question": "which entity is a sculpture?", "label": 1}, {"captions": ["a white 3d mannequin human head.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["0598fef648c8422f84410847fda77e6a", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["color, white, mannequin", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white image of a heart shaped ring", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a human head on a white background royalty free 3d model preview no.2", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a house with a roof structure, including a greenhouse.", " a small building with a yellow roof, featuring a box, a chair, and a plane flying overhead."], "sample_ids": ["d7483292784b4e2b81df1c50f2a8664a", "a2354f13774340d392fbf33564934aab"], "properties": ["roof, structure, greenhouse", "building, roof, yellow"], "captions_pred_pc": ["a 3d illustration of a window with dots on a white background 3d illustration of a window with dots on a white background royalty free illustration", "a black and white image of a cell phone"], "captions_pred_image": ["a 3d model of a building with a roof", "a 3d rendering of a machine with a conveyor belt"], "question": "which roof is yellow", "label": 1}, {"captions": [" a white table with grey legs, a white top, writing on it, and three legs.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["68e0d097351843a3980421f2ae624c59", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["white, top, writing", "island, mountain, grass"], "captions_pred_pc": ["a group of black dots on a white background stock illustration a group of black dots on a white background royalty free illustration", "a black and white map of the island of malta"], "captions_pred_image": ["a table with writing on it and a mouse on top of it", "a 3d image of a small island in the middle of a lake"], "question": "which entity has grass", "label": 1}, {"captions": ["a white teddy bear, cloud, skull, octopus, and bird in various positions on a gray background.", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["dd5849aced0443b1b4b38d413f7e06c4", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["background, color, white", "lizard, rock, stuffed animal"], "captions_pred_pc": ["a black and white image of a cat's head", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of an animal skull in white on a gray background", "a 3d model of a vehicle with wheels and tires"], "question": "which entity has a stuffed animal?", "label": 1}, {"captions": ["a featuring a phone booth, desk, chair, lamp, ladder, and two additional chairs.", " a small white house with a roof."], "sample_ids": ["7da804ad2b554c9a9915d775afb015d3", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["desk, chair, lamp", "roof, color, white"], "captions_pred_pc": ["a black and white illustration of a city skyline", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d rendering of a desk and chair in a room", "a 3d model of a building with a white roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" the earth featuring various elements such as temperature chart, blue and green stripes, blue arrow, exosphere label, england label, blue and purple stripes, and a blue flag.", " a small village featuring houses, trees, and a winding road."], "sample_ids": ["4945571db2d8467cb2aed8dd0d891c2e", "7acf46c0265d4e39b97ac084852abde8"], "properties": ["color, temperature, england", "houses, trees, road"], "captions_pred_pc": ["of a black and white photo of an airplane on a white background", "in 15 words or less a black and white photo of a mountain landscape"], "captions_pred_image": ["a 3d model of the earth with a rainbow in the sky", "a black and white photograph of a small town"], "question": "which entity has a road?", "label": 1}, {"captions": [" a small building with stairs and a glass floor, featuring a square table and a black square ceiling light.", " a white sofa, chair, and box."], "sample_ids": ["8aaad713b8834739b008ccf2f3d86cce", "4c59733ebd634594a921b7ace60e4142"], "properties": ["floor, table, light", "sofa, chair, box"], "captions_pred_pc": ["above a black and white photograph of a window", "a black and white drawing of a couch with dots"], "captions_pred_image": ["a black and white 3d model of a staircase on a platform", "a 3d model of a white chair"], "question": "which entity has a sofa", "label": 1}, {"captions": ["a wooden tower made of stacked blocks with holes in them, resembling a toy castle.", " a wooden roof structure with a pink roof."], "sample_ids": ["da8b5d21da9b4037982f29383d60b100", "b70565bda31d42958d3597bf6067ddd2"], "properties": ["resembles, toy, castle", "roof, color, pink"], "captions_pred_pc": ["a black and white drawing of a pair of scissors", "above a black and white image of a metal grate"], "captions_pred_image": ["a 3d model of a tower made out of blocks", "a 3d model of the roof of a building"], "question": "which structure has a pink roof", "label": 1}, {"captions": [" a building or house with a roof and floor plan, resembling a pyramid with a flat roof.", " a house with a blue roof."], "sample_ids": ["7a91292e1ed64e60a1bbbb499209a0df", "8ff693cd3ca74f8a901ca259b8b3a7ac"], "properties": ["apse, roof, floor plan", "roof, color, blue"], "captions_pred_pc": ["a black and white drawing of a room", "a black and white drawing of a cross on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a building in the shape of a pyramid", "a 3d model of a house with a roof"], "question": "which roof is blue", "label": 1}, {"captions": ["a small yellow and black helicopter flying in the air.", " a robot with a blue, purple, and white body."], "sample_ids": ["7qxP6dQ5nNuaG8d0vswXXKnd0vq", "6f98acb9e03c4cbd9c83f2c8f9cd3ddc"], "properties": ["color, yellow, black", "body, color, white"], "captions_pred_pc": ["a black and white illustration of an airplane", "above a black and white image of a robot"], "captions_pred_image": ["a black and white helicopter flying in the air", "a 3d model of a robot standing in the middle of a white background"], "question": "which entity has a white body?", "label": 1}, {"captions": ["3d snowman model with a wooden stick.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["44f41039246a4df59027c38023d5a576", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["- material is wood - color is white - shape is 3d", "a, material, clay"], "captions_pred_pc": ["a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d snowman royalty-free 3d model preview", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": [" a white and yellow metal shelving unit with a steel structure, yellow legs, and suspended ceiling system.", " a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog."], "sample_ids": ["c1a7d264b34841409009b3d5d39d5b99", "f6c6e7a65a3e42dfa431b1d984c72f28"], "properties": ["Steel, Color, Yellow", "house, fence, dog"], "captions_pred_pc": ["a black and white illustration of a building", "above a black and white drawing of a bathroom"], "captions_pred_image": ["a 3d model of a table with multiple tables and chairs", "a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a spacious office featuring numerous desks, chairs, computers, and a ceiling with blue and white triangles and a ceiling light.", " a potted plant on a wooden table or shelf."], "sample_ids": ["0d7e4d9471414a21b4a5b18a54f7ec22", "1f99b86478764fa7abd65785a53ebbe8"], "properties": ["ceiling, light, desks", "potted plant, table, shelf"], "captions_pred_pc": ["a black and white drawing of a square on a white background", "a black and white illustration of a plant in a vase on a white background vector illustration of a black and white illustration of a plant in a vase on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of an office space with desks and chairs", "a black and white image of a plant in a pot"], "question": "which object is on a table?", "label": 1}, {"captions": ["a 3d blue star featuring various text, including \"prchen,\" \"bible chen,\" \"rib chicken,\" and \"birch chen.\"", " a small, rocky island with diverse terrain and scattered rocks."], "sample_ids": ["0b712fc68ad44ce8a33592d2b26aac18", "09f2cf267e954c958828325067bcc36a"], "properties": ["color, shape, text", "island, terrain, rocks"], "captions_pred_pc": ["a black and white image of a pair of sunglasses", "above a black and white photo of a small island in the middle of a body of water"], "captions_pred_image": ["a white paper airplane flying over a gray background", "a black and white image of a piece of dirt on the ground"], "question": "which entity is a rocky island?", "label": 1}, {"captions": [" a vibrant city skyline featuring various colored buildings, trees, and skyscrapers.", "a white of a woman with her arms outstretched."], "sample_ids": ["1a1fb9b0d83845f6b1238fb45e0defff", "2cbfaa4fb2d84a6f94e67b4fd6e2f26f"], "properties": ["color, skyline, buildings", "image, color, white"], "captions_pred_pc": ["a black and white illustration of a city skyline", "a black and white silhouette of a person on a skateboard"], "captions_pred_image": ["a black and white 3d model of a city skyline", "a 3d model of a woman with her arms outstretched"], "question": "which image is white", "label": 1}, {"captions": [" of a black keyboard and ceiling light fixture.", " a black and white box-like object with various interpretations, such as a coffee table, building, book, and ceiling fixture."], "sample_ids": ["2378a9b64c054ddcaea234990a3bdba4", "404d7e2cd8894c31bdda02d2b3196464"], "properties": ["Black, keyboard, fixture", "black, white, coffee table"], "captions_pred_pc": ["a black and white image of a train on a white background", "a black and white drawing of a square with dots on it"], "captions_pred_image": ["3d model of a keyboard royalty free 3d model preview no.2", "a black and white 3d model of a building"], "question": "which object is black and white?", "label": 1}, {"captions": [" of a mushroom cloud with white tree and coral plant elements, featuring blue and green leaves.", " a small table with a staircase and a square ceiling light."], "sample_ids": ["bca5233d878e4cf09b5bc2bb6f3915b0", "ef0cbf4628b74253b885480deefe6fbd"], "properties": ["color, shape, texture", "table, staircase, light"], "captions_pred_pc": ["a black and white image of a square with dots on it", "for a black and white image of a small square on a white background"], "captions_pred_image": ["a 3d model of a tree made out of white flowers", "a 3d model of a table with a staircase"], "question": "which entity has a square light?", "label": 1}, {"captions": ["3d silver sculpture resembling a triangular wing with a white arrow design.", "a white of a woman with her arms outstretched."], "sample_ids": ["578cb910905042939d876af28e29eb2f", "2cbfaa4fb2d84a6f94e67b4fd6e2f26f"], "properties": ["wing, silver, white", "image, color, white"], "captions_pred_pc": ["a black and white illustration of a pair of hands", "a black and white silhouette of a person on a skateboard"], "captions_pred_image": ["a white paper airplane flying against a gray background", "a 3d model of a woman with her arms outstretched"], "question": "which entity is a white image?", "label": 1}, {"captions": [" a white hospital operating room with blue containers and medical equipment.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["a10f3196831647bc8842eacac640047d", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["color, white, containers", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white illustration of the letter 'f' made up of tiny dots", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a small gnome chandelier with three candles, featuring red and blue lights and adorned with pink and blue ribbons.", "a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier."], "sample_ids": ["6398f4e75a2d480da58396827ac64249", "ef8288c9fdfc4e0f9c1fe25d570a104e"], "properties": ["- color is red, blue, pink", "color is white, yellow, plastic"], "captions_pred_pc": ["for a black and white image of an object on a white background", "a black and white image of a metal bowl with dots"], "captions_pred_image": ["a white chandelier with three light bulbs hanging from the ceiling", "a white plastic container with a label on it"], "question": "which entity is made of plastic", "label": 1}, {"captions": ["a 3d minecraft model of the acropolis and a city, featuring a desert building and a room with debris.", "a 3d minecraft model of the acropolis and a city, featuring a desert building and a room with debris."], "sample_ids": ["3cbdd3ff48804f9a8041b890838613ec", "3cbdd3ff48804f9a8041b890838613ec"], "properties": ["acropolis, city, building", "acropolis, city, building"], "captions_pred_pc": ["a black and white drawing of a dotted square on a white background a black and white drawing of a dotted square on a white background royalty free illustration", "a black and white drawing of a dotted square on a white background a black and white drawing of a dotted square on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of the temple of olympian zeus in athens, greece royalty free 3d model preview", "a 3d model of the temple of olympian zeus in athens, greece royalty free 3d model preview"], "question": "which entity has a desert building?", "label": 0}, {"captions": [" of a destroyed building with a watercolor painting of a dilapidated house.", " a large building with a roof and windows."], "sample_ids": ["5a33f024faf145ac80cdadcdfef8a797", "32d1fbd3ee91426882290305f70021e6"], "properties": ["image, building, painting", "roof, windows, building"], "captions_pred_pc": ["above a black and white drawing of a building", "of a black and white photo of a diamond buckle"], "captions_pred_image": ["a black and white photograph of a damaged house", "a 3d model of an apartment building royalty free 3d model preview no.2"], "question": "which building has a roof and windows", "label": 1}, {"captions": [" a small bedroom with wooden floors, walls, roof, and shelf.", " a table with pillar-like yellow and white poles, featuring a wooden structure, large tent, glass roof, and a chandelier with numerous glass tubes."], "sample_ids": ["e602ac60041f4b4f84c044161e478781", "fa06167d83e54b05bdfbeeae2ca7c8a6"], "properties": ["floor, wall, roof", "table, structure, roof"], "captions_pred_pc": ["above a black and white image of a decorative metal bar", "a black and white image of a map with dots"], "captions_pred_image": ["a 3d model of a room with wooden walls and a rug on the floor", "a 3d model of a building with many pillars"], "question": "which entity has a wooden structure", "label": 1}, {"captions": [" an old castle in a grassy field.", " a pink-framed building structure with beams and trusses."], "sample_ids": ["26ea562f32d54afe919b73486dbf7d53", "18e392c5360146eda498c5edab25b15c"], "properties": ["field, grass, castle", "frame, beams, trusses"], "captions_pred_pc": ["above a black and white image of a castle in the middle of a field", "a black and white drawing of a metal grate"], "captions_pred_image": ["a black and white image of a broken column on a piece of paper", "a 3d model of a building under construction"], "question": "which structure has more beams", "label": 1}, {"captions": ["a purple and blue sword with gold accents.", " a white castle composed of small cubes."], "sample_ids": ["4b91f68e3f6d469d9e357d6ca48fc30c", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["color, material, pattern", "composed of, white, cubes"], "captions_pred_pc": ["a black and white image of a snowflake on a white background", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["a 3d model of a sword with a long blade", "a 3d model of a castle made of white cubes"], "question": "which entity is composed of small cubes", "label": 1}, {"captions": [" a large building with a roof and windows.", " a small white house with a roof."], "sample_ids": ["32d1fbd3ee91426882290305f70021e6", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["roof, windows, building", "roof, color, white"], "captions_pred_pc": ["of a black and white photo of a diamond buckle", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d model of an apartment building royalty free 3d model preview no.2", "a 3d model of a building with a white roof"], "question": "which building has a roof that is white?", "label": 1}, {"captions": [" of a round blue and brown concrete bowl with a logo.", " of a stone fountain with a lion statue, surrounded by an archway inspired by the arch of triumph."], "sample_ids": ["926ff028e22a4a628b76baba18e8d94e", "a72700696c3b44ef8101d1e71e914bc9"], "properties": ["color, shape, material", "lion, statue, fountain"], "captions_pred_pc": ["of a black bracelet with dots on it", "a black and white image of a metal object"], "captions_pred_image": ["a 3d rendering of a concrete bowl on a white background", "a 3d image of a lion statue on a wall"], "question": "which object is made of stone", "label": 1}, {"captions": [" a white building, table, and various piles of paper, including a low-poly object.", " a white building, table, and various piles of paper, including a low-poly object."], "sample_ids": ["515210fb031f4ec89021ee8ce9e432e9", "515210fb031f4ec89021ee8ce9e432e9"], "properties": ["- building is white, table is white, piles of paper are white", "- building is white, table is white, piles of paper are white"], "captions_pred_pc": ["a black and white drawing of a piece of paper", "a black and white drawing of a piece of paper"], "captions_pred_image": ["a 3d model of a snowy landscape", "a 3d model of a snowy landscape"], "question": "which entity has a white building", "label": 0}, {"captions": ["a 3d white cube featuring a hole, wheels, and a diamond.", " a large orange pumpkin."], "sample_ids": ["e44009d33258425e8efedfbc6823bf70", "684df453535b4ec28c4d5b64dcd60f59"], "properties": ["- color is white- shape is cube- material is plastic", "orange, large, pumpkin"], "captions_pred_pc": ["for a black and white image of a toothbrush in the shape of a toothbrush", "a black and white circular pattern on a white background a black and white circular pattern on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white cube", "a 3d model of a pumpkin on a white background"], "question": "which object is orange?", "label": 1}, {"captions": [" a yellow and white structure featuring lamp posts, bridge, poles, pier, stairway, and hanging rods.", " of a house with a roof structure, including a greenhouse."], "sample_ids": ["34257a26ad2e4c6d91ef6d5cd4bd7c43", "d7483292784b4e2b81df1c50f2a8664a"], "properties": ["color, pier, stairway", "roof, structure, greenhouse"], "captions_pred_pc": ["above a black and white illustration of an open door", "a 3d illustration of a window with dots on a white background 3d illustration of a window with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a long bench on a white background royalty free 3d model no.", "a 3d model of a building with a roof"], "question": "which structure has a roof structure", "label": 1}, {"captions": [" a building featuring graffiti, chinese writing, a door, a broken window, and an interior bathroom with a sink.", " of a machine gun with additional items."], "sample_ids": ["1ee3df6f94ea4c329a9c5245634e34d5", "d6e472d088b647c4bf07105b0fba3dba"], "properties": ["graffiti, chinese writing, door", "gun, type, machine gun"], "captions_pred_pc": ["a black and white illustration of a bridge with dots", "a black and white illustration of an airplane in the shape of the letter 'a'"], "captions_pred_image": ["a black and white image of a bathroom with a sink and a toilet", "a 3d model of a submachine gun"], "question": "which object is more likely to be used in a war", "label": 1}, {"captions": [" of a colorful green, yellow, and blue bird.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["b8c3b9076fd14b0e934f2784d8de105a", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["color, bird, green, yellow, blue", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["above a black and white image of a bird in flight", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["3d model of a bird royalty-free 3d model preview", "a white 3d model of a city skyline"], "question": "which image shows a laptop?", "label": 1}, {"captions": ["a pixelated of a striped brown and white coffee mug.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["656fca269cb042e68b1fb5b629bfa873", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["mug, color, brown, white", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["a black and white illustration of a circle made up of many small dots", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a coffee cup with multiple layers", "a white 3d model of a city skyline"], "question": "which image shows a laptop?", "label": 1}, {"captions": [" of a white plastic tube or metal bar, resembling a knife.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["8fd3836862a44a8d8b4d224bfc30c2c3", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["a knife, blade, handle", "torso, breasts, pattern"], "captions_pred_pc": ["a black and white image of a shelf with a white background", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a piece of white plastic on a gray background", "a 3d model of a woman's chest"], "question": "which entity has a pattern", "label": 1}, {"captions": [" a white pendant floor lamp.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["073a3fb2e9bd42d1affed98ed2d64794", "b896a0898efe4059a776193c02132129"], "properties": ["height, lamp, shade", "- material is stone, metal, concrete"], "captions_pred_pc": ["of a black circle on a white background", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a light bulb on a pedestal royalty free 3d model preview no.", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": [" a small white barn with a metal roof.", " a green mountain with trees and grass."], "sample_ids": ["4ca3342a96824684845f7d0e062ab176", "08fb23bdc67b4b0ba5fc64ea5c97e5f7"], "properties": ["roof, metal, white", "mountain, grass, tree"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a house made of dots", "in 15 words or less a black ink blot on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a barn", "a 3d model of a mountain with snow on it"], "question": "which entity has grass", "label": 1}, {"captions": [" a modern office building with a green door, green roof, windows, and blue lights.", "a featuring a chair, a building, a glass tower, and a throne made of money, with various unique elements such as a green roof and a computer chip."], "sample_ids": ["d6087023095446fbadef1721478373b2", "18d2e75f23474d7489a6d7d605dfc76d"], "properties": ["door, roof, window", "throne, chair, building"], "captions_pred_pc": ["a black and white drawing of a toilet brush", "a black and white illustration of a person sitting on a bench"], "captions_pred_image": ["a 3d model of an apartment building", "a 3d model of a building on top of a table"], "question": "which entity has a chair", "label": 1}, {"captions": [" a large, multi-floor building with columns, shelves, conveyor table, and a ceiling structure featuring pipes.", " a small, rocky island with diverse terrain and scattered rocks."], "sample_ids": ["6d773d2b0ed9437ea2b9b352bd8a5c25", "09f2cf267e954c958828325067bcc36a"], "properties": ["building, floor, columns", "island, terrain, rocks"], "captions_pred_pc": ["in one line a black and white drawing of a dotted pattern on a white background", "above a black and white photo of a small island in the middle of a body of water"], "captions_pred_image": ["a 3d rendering of a large white table with multiple shelves", "a black and white image of a piece of dirt on the ground"], "question": "which entity has more rocks", "label": 1}, {"captions": [" a building or house with a roof and floor plan, resembling a pyramid with a flat roof.", " a snowy city with buildings and a plane flying overhead."], "sample_ids": ["7a91292e1ed64e60a1bbbb499209a0df", "cc63ceb2b5e84872a1a1f6423de419e2"], "properties": ["apse, roof, floor plan", "building, plane, city"], "captions_pred_pc": ["a black and white drawing of a room", "a black and white photo of an airplane on a white background"], "captions_pred_image": ["a 3d model of a building in the shape of a pyramid", "a 3d model of a city in black and white"], "question": "which entity has a plane flying over it", "label": 1}, {"captions": [" a house with a roof structure, featuring a brick wall and suspended box.", " a building with a roof structure, featuring a wooden truss and ceiling with a light."], "sample_ids": ["1a7bfcf3755142bab90d3d7cb02d0f2c", "cb42ecb7a3fd4eba99f166150ecbc9a7"], "properties": ["roof, structure, wall", "roof structure, truss, ceiling"], "captions_pred_pc": ["a black and white illustration of a group of dots on a white background", "a black and white image of a stainless steel sculpture"], "captions_pred_image": ["a 3d model of a building with a roof", "a 3d model of a barn royalty free 3d model preview no 2"], "question": "which structure is more complex", "label": 1}, {"captions": [" a wooden-framed house with roof trusses.", " a wooden shed with a gray roof."], "sample_ids": ["e60dd370c5ec468da4689a801f951157", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["frame, roof, trusses", "roof, color, gray"], "captions_pred_pc": ["a black and white drawing of a metal grate", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a house under construction", "a 3d model of a shed with a gray roof"], "question": "which building has a roof that is gray", "label": 1}, {"captions": [" a house featuring a detailed roof structure and a suspended ceiling with a map on it.", " a house with a yellow roof, wooden beams, and yellow frames."], "sample_ids": ["ee7e6031912b46bc8ca7205a959c5c16", "703dce44052e48cfb024bceb08141554"], "properties": ["roof, structure, suspended", "roof, beams, frames"], "captions_pred_pc": ["a black and white image of a piece of lace", "a black and white drawing of a boat"], "captions_pred_image": ["a 3d model of a house with a metal roof", "a 3d model of a wooden structure"], "question": "which house has a yellow roof", "label": 1}, {"captions": [" a metal-framed wall with red and blue bars in a steel structure.", " a wooden shed with a gray roof."], "sample_ids": ["fefc99453e2d4406a9668d5697224c0f", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["color, red, blue, structure", "roof, color, gray"], "captions_pred_pc": ["a black and white image of a person holding a toothbrush", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a metal frame structure", "a 3d model of a shed with a gray roof"], "question": "which structure has a roof that is gray?", "label": 1}, {"captions": [" a house featuring a wooden roof truss structure and ceiling with wood beams.", " a large white and metal building with a metal roof structure."], "sample_ids": ["990f06da2ba4488da8371f68da6b4523", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["roof truss, beams, structure", "roof, metal, white"], "captions_pred_pc": ["a black and white illustration of a staircase in the shape of the letter 'l'", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a 3d model of a house with a wooden roof", "a 3d model of a large white box"], "question": "which building has a metal roof", "label": 1}, {"captions": ["white pendant light fixture ()", " a futuristic, small spaceship with two propellers, resembling a jet fighter."], "sample_ids": ["a7ce6d03c06d4c32bb507d7f1ee3c971", "832a022cdcc74763b0571e04af4e592b"], "properties": ["color is white, material is metal, light source is incandescent", "resembles a jet fighter, futuristic, small"], "captions_pred_pc": ["in 15 words or less a black and white illustration of the letter 'p'", "a black and white illustration of a person's hand holding a pencil"], "captions_pred_image": ["a white hanging light fixture with a white shade", "a lego model of a futuristic airplane"], "question": "which entity is not a spaceship?", "label": 1}, {"captions": [" featuring a chair, table, and refrigerator.", " a small white house with a roof."], "sample_ids": ["cd967e38e9364ed28c4090e905740c9d", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["chair, table, refrigerator", "roof, color, white"], "captions_pred_pc": ["a 3d rendering of a white surface with black dots on it", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d model of a chair, a table, and a refrigerator", "a 3d model of a building with a white roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" a cityscape featuring various buildings, trees, an airport runway, and a plane.", " a multicolored cube representing a protein, featuring pink, yellow, red, and green hues."], "sample_ids": ["9b45036d3f0342a78db9a25938dc77fc", "ee7c3113f2754f9cbe8980b1b7cc4eff"], "properties": ["building, tree, plane", "color, shape, color"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a snowflake in the shape of a snowflake royalty free illustration", "a black and white pattern of small dots on a white background a black and white pattern of small dots on a white background illustration"], "captions_pred_image": ["a black and white image of an industrial area with buildings and trucks in the foreground", "a 3d model of a piece of fabric"], "question": "which entity is a cube?", "label": 1}, {"captions": ["pink and green pendant light hanging from a ceiling.", " of a small house featuring a flat, brown roof, table with two chairs and a stool, ceiling light, and a window."], "sample_ids": ["1651a898288149edb8cbff0e1b2d692b", "3a509431d96b43f8a7aebe2846f08b96"], "properties": ["color, pink, green", "roof, brown, flat"], "captions_pred_pc": ["above a black and white photo of a small circular object on a white background", "a black and white drawing of a snowflake on a white background a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a white pendant light hanging from the ceiling", "a 3d rendering of a table and stool"], "question": "which entity has a brown roof", "label": 1}, {"captions": [" of a machine gun.", " of a clear glass display case with a square light fixture."], "sample_ids": ["2332041c12f640e6a9ce432e6e278455", "980ded888795417f94b46750105e1597"], "properties": ["gun, barrel, caliber", "light, fixture, square"], "captions_pred_pc": ["a silhouette of a machine gun on a white background", "in 15 words or less a black and white illustration of a room with dots on the floor"], "captions_pred_image": ["a 3d model of a machine gun royalty free 3d model preview no 2", "a 3d model of a clear plastic box"], "question": "which object has a square light fixture", "label": 1}, {"captions": ["a white 3d printed mickey mouse dice with various numbers and symbols on it.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["e2645ac544844f3c981203134a99c30c", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["- material is plastic- shape is dice- color is white", "house, roof, wooden"], "captions_pred_pc": ["a circle of dots with the number 2 in the center", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d printed white dice with a mickey mouse face", "a black and white photograph of a birdhouse"], "question": "which object is made of wood", "label": 1}, {"captions": [" a house featuring a roof, floor plan, heating system, and ceiling light fixture.", " of a house with a roof structure, including a greenhouse."], "sample_ids": ["269939560e08432f9e134309b9a1c587", "d7483292784b4e2b81df1c50f2a8664a"], "properties": ["floor plan, heating system, ceiling light fixture", "roof, structure, greenhouse"], "captions_pred_pc": ["a black and white drawing of a house", "a 3d illustration of a window with dots on a white background 3d illustration of a window with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a building with a glass facade", "a 3d model of a building with a roof"], "question": "which house has a roof structure", "label": 1}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", " a stone, wood, rock, sliced bread, and a skull with a blue hat."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "0169af65ffc64bbf8e2fe6c6de08d485"], "properties": ["color, material, structure", "hat, skull, bread"], "captions_pred_pc": ["a black and white drawing of a room with dots", "a black and white illustration of a skull in the shape of dots"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a black and white image of a stone sculpture"], "question": "which entity has a skull with a blue hat", "label": 1}, {"captions": ["a 3d cartoon character wearing a hat and holding various items, such as a sword, gun, broomstick, and stick.", " of a cherry blossom bonsai tree with pink flowers."], "sample_ids": ["9863dee114264b89a95dca4c78d08424", "037fff0f153c41ea8b9c9392c2e2439a"], "properties": ["hat, sword, gun", "flower, color, pink"], "captions_pred_pc": ["for a black and white illustration of a small monster holding a spoon in one hand and a cup of coffee in the other", "for a black and white illustration of a person on a skateboard"], "captions_pred_image": ["a 3d model of a police officer holding a baton and wearing a hat and helmet", "a 3d model of a bonsai tree on a pedestal"], "question": "which entity has a pink flower", "label": 1}, {"captions": [" a potted plant on a wooden table or shelf.", " a small house with a tree in front, situated on a hill."], "sample_ids": ["1f99b86478764fa7abd65785a53ebbe8", "3bde44b5f10946398f1bb9843dc14caa"], "properties": ["potted plant, table, shelf", "house, tree, hill"], "captions_pred_pc": ["a black and white illustration of a plant in a vase on a white background vector illustration of a black and white illustration of a plant in a vase on a white background royalty free illustration", "a black and white photo of a cell phone in a puddle of water"], "captions_pred_image": ["a black and white image of a plant in a pot", "a 3d model of a house in the middle of a field"], "question": "which entity is situated on a hill", "label": 1}, {"captions": [" of a pink butterfly with floral elements.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["4a57365a85194a188ac1bdaf5fe1c398", "b896a0898efe4059a776193c02132129"], "properties": ["color, shape, pattern", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white image of a knife on a white background", "of a white candle in the shape of a buddha"], "captions_pred_image": ["three petals of a flower laying on a gray surface", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": [" a yellow and white structure featuring lamp posts, bridge, poles, pier, stairway, and hanging rods.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["34257a26ad2e4c6d91ef6d5cd4bd7c43", "b896a0898efe4059a776193c02132129"], "properties": ["color, pier, stairway", "- material is stone, metal, concrete"], "captions_pred_pc": ["above a black and white illustration of an open door", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a long bench on a white background royalty free 3d model no.", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": ["a featuring a white dog, white wolf, white and pink cats, and a pink fish, all with pink eyes.", " a small white house with a roof."], "sample_ids": ["5392e72784be41e485bf2d43cf0bee6a", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["color, white, pink, eyes", "roof, color, white"], "captions_pred_pc": ["a black and white image of a bear's head", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d model of a wolf's head on a white background", "a 3d model of a building with a white roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", " a brick wall with grass."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "53f2d948091f417cb580e22469c94db2"], "properties": ["color, shape, and size", "brick, grass, wall"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "above a black and white illustration of an underwater scene"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a black and white photo of a brick wall and a puddle"], "question": "which entity is a wall?", "label": 1}, {"captions": [" a basket containing various objects, including balls, eggs, a stuffed animal, and a baby, as well as a ring and spheres with wire mesh.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["d795985a8bbe480282e349ff85fb1ef2", "bf18bfd89efd43389781050230467d58"], "properties": ["Contains, Objects, Various", "Lights, number, five"], "captions_pred_pc": ["a black and white dots pattern on a white background", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d sculpture of a person sitting on a chair", "a white chandelier with five white shades"], "question": "which object has more lights", "label": 1}, {"captions": [" a silver spaceship-skull hybrid with blue lights, spheres, and jewels, featuring a robot with a head and blue eyes.", " of a person in a green outfit, resembling a cartoon character."], "sample_ids": ["ae7c805076b14abe83578069c7bf1c03", "839a873262a544c9906f3f5799ca4648"], "properties": ["color, light, jewels", "a, outfit, green"], "captions_pred_pc": ["in 15 words or less a black and white image of a snowflake on a white background royalty free illustration", "a person with arms outstretched 3d illustration of a person with arms outstretched on a white background illustration"], "captions_pred_image": ["a 3d sculpture of a heart with various objects on top of it", "a person jumping on a trampoline in slow motion"], "question": "which entity is a person?", "label": 1}, {"captions": ["a black and gold cylindrical object with a gold handle, resembling a cigarette lighter, lamp, and pen.", " a black and purple dragon with wings and purple eyes."], "sample_ids": ["c9b1c89380e947f58aa06eb56c93c6d8", "9fdaa7bf7dbe499482d10705cbe366d2"], "properties": ["- color is black and gold- shape is cylindrical- material is metal", "Eye color, Black, Purple"], "captions_pred_pc": ["a black and white image of a circular object on a white background", "a black and white illustration of an airplane on a white background"], "captions_pred_image": ["a 3d model of a black and white object on a gray background", "a 3d model of a dragon with wings"], "question": "which entity has black eyes", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " a house with a wooden-framed roof structure."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["color, white, black, white", "roof, material, wood"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d model of a building with a roof"], "question": "which object is made of wood", "label": 1}, {"captions": [" a house with a roof structure and toothbrushes.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["7632d1ba4e8144c19484c263b6074d0c", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["house, roof, toothbrushes", "door, lock, handle"], "captions_pred_pc": ["a black and white illustration of the letter 'b' isolated on a white background illustration", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a 3d rendering of a white box with a lot of blades", "a black and white image of a door with a crack in it"], "question": "which door has a lock", "label": 1}, {"captions": [" a house with a blue roof.", " of a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["8ff693cd3ca74f8a901ca259b8b3a7ac", "8cd3f5ff0fc041eca9a480faa6739480"], "properties": ["roof, color, blue", "roof, trusses, beams"], "captions_pred_pc": ["a black and white drawing of a cross on a white background royalty free illustration", "in 15 words or less a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a house with a roof", "a 3d model of a roof structure"], "question": "which roof is made of wood", "label": 1}, {"captions": [" a pink and green cube and chair.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["c6faf508072b4d0ea014bd9c748b79c3", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["color, chair, pink, green", "camera, speaker, ceiling fan"], "captions_pred_pc": ["above a black and white image of a curved line made up of dots", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a simple white chair", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": [" of a character wearing glasses and a hat.", " a stone, wood, rock, sliced bread, and a skull with a blue hat."], "sample_ids": ["032e7ce682ff43d5aa5ca4fd34eacf14", "0169af65ffc64bbf8e2fe6c6de08d485"], "properties": ["hat, glasses, character", "hat, skull, bread"], "captions_pred_pc": ["in 15 words or less a silhouette of a bell on a white background royalty free illustration", "a black and white illustration of a skull in the shape of dots"], "captions_pred_image": ["a 3d model of a person wearing sunglasses", "a black and white image of a stone sculpture"], "question": "which entity has a skull with a blue hat?", "label": 1}, {"captions": ["a featuring a sailboat with a hook, a bird suspended in the air, and a pair of scissors.", " of a wrecked plane, ship, and bird on a pile of rocks with grass."], "sample_ids": ["f57ae66555d34349aeadc38b33f8f267", "b0c703df20154bbf9fd8707c61137fc5"], "properties": ["a, bird, hook", "plane, ship, bird"], "captions_pred_pc": ["of a 3d scan of a person's torso and limbs", "a black and white watercolor map of the state of ohio"], "captions_pred_image": ["a black and white photo of a kite flying in the sky", "a black and white photograph of a pile of debris on the ground"], "question": "which entity has a bird on a pile of rocks?", "label": 1}, {"captions": [" a green, spiked cube-like creature with eyes and a liquid-like texture.", "a wooden-cased radio."], "sample_ids": ["0d2850148a154f3ca72d72817e120759", "c79d1be9b9a0478993bee32c63231a88"], "properties": ["texture, spikes, eyes", "case, material, wood"], "captions_pred_pc": ["a black and white drawing of a flower", "in 15 words or less a black and white drawing of a computer screen"], "captions_pred_image": ["a 3d model of an ice cube on a white background royalty-free 3d model preview no. 3", "3d model of a vintage radio 3d model of a vintage radio"], "question": "which object is made of wood", "label": 1}, {"captions": [" a small wooden log cabin.", " a large, ancient stone building, resembling a roman structure and a medieval castle."], "sample_ids": ["dc05e20369e640609f0651ce66118669", "f96abfafd34040a4bb09f5e2973403e9"], "properties": ["size, material, log", "building, material, stone"], "captions_pred_pc": ["a black and white drawing of a person sitting on a bench in a park royalty free illustration", "a black and white drawing of the letter 'l' on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a log cabin 3d model of a log cabin royalty free 3d model preview no 3", "3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosseum 3d model of the colosse"], "question": "which building is made of stone", "label": 1}, {"captions": ["a low poly of a deer, antelope, llama, capybara, and kangaroo.", " a small house on an island with trees, shrubs, a pool, and a lake."], "sample_ids": ["8b4c2e3e76524d85a9395ea1169d953e", "c8331489fca44685bedfa1bdadf6ccb3"], "properties": ["low poly, llama, kangaroo", "house, lake, pool"], "captions_pred_pc": ["above a black and white image of an animal sculpture", "a black and white image of a pattern on a piece of paper"], "captions_pred_image": ["a 3d low poly animal standing on its hind legs", "a 3d model of a large building"], "question": "which entity has a pool", "label": 1}, {"captions": [" of a cherry blossom bonsai tree with pink flowers.", " a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles."], "sample_ids": ["037fff0f153c41ea8b9c9392c2e2439a", "bd7aab78974643f5a0660c699daf8eb3"], "properties": ["flower, color, pink", "roof, color, yellow"], "captions_pred_pc": ["for a black and white illustration of a person on a skateboard", "a black and white drawing of a room"], "captions_pred_image": ["a 3d model of a bonsai tree on a pedestal", "a 3d model of a table and chairs on a white background"], "question": "which entity has a roof that is yellow", "label": 1}, {"captions": [" of a gray toolbox with an orange handle, filled with various tools.", "a 3d object featuring a white tray with a decorative pattern, a silver tray with a bird, a laptop, a long knife, and a metal bar."], "sample_ids": ["40c34834c6364e00b0157eb87914fc51", "b1099ba41d9f4af19d1a91761bb6074c"], "properties": ["color, handle, gray", "Object, Tray, Tray"], "captions_pred_pc": ["a black and white image of a toothbrush and toothbrush holder", "above a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of a toothbrush kit royalty free 3d model no.", "a 3d image of a white tray with an intricate design"], "question": "which object has a tray?", "label": 1}, {"captions": [" a building or house with a roof and floor plan, resembling a pyramid with a flat roof.", " a colorful, wire-framed building structure resembling a cube."], "sample_ids": ["7a91292e1ed64e60a1bbbb499209a0df", "62b7c7c684044d998fee9ff35beeb79b"], "properties": ["apse, roof, floor plan", "color, frame, shape"], "captions_pred_pc": ["a black and white drawing of a room", "a black and white illustration of a building made up of dots"], "captions_pred_image": ["a 3d model of a building in the shape of a pyramid", "a 3d model of a building structure"], "question": "which building structure has a frame?", "label": 1}, {"captions": [" a small bedroom with wooden floors, walls, roof, and shelf.", " a house with a roof, wooden beams, and chimney."], "sample_ids": ["e602ac60041f4b4f84c044161e478781", "be1376023c274bdda995d54f3694157f"], "properties": ["floor, wall, roof", "roof, beams, chimney"], "captions_pred_pc": ["above a black and white image of a decorative metal bar", "a black and white drawing of a bathroom with a shower"], "captions_pred_image": ["a 3d model of a room with wooden walls and a rug on the floor", "a 3d model of a house with a roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" a house featuring a roof, floor plan, heating system, and ceiling light fixture.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["269939560e08432f9e134309b9a1c587", "b896a0898efe4059a776193c02132129"], "properties": ["floor plan, heating system, ceiling light fixture", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white drawing of a house", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a building with a glass facade", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": [" a small white building with a door, resembling a box-shaped house.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["1b5fe88d0ff149ae9d8b4eb455c5c90c", "6b745457e06840119058883b35f78f58"], "properties": ["shape is box, color is white, door is present", "roof, color, blue"], "captions_pred_pc": ["a black and white image of a person standing in front of a white background", "a black and white image of a building with dots"], "captions_pred_image": ["a 3d model of a white, open shelving unit", "a 3d model of a house with a steeple on top"], "question": "which building has a blue roof", "label": 1}, {"captions": [" a small village featuring houses, trees, and a winding road.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["7acf46c0265d4e39b97ac084852abde8", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["houses, trees, road", "torso, breasts, pattern"], "captions_pred_pc": ["in 15 words or less a black and white photo of a mountain landscape", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a black and white photograph of a small town", "a 3d model of a woman's chest"], "question": "which entity is a torso?", "label": 1}, {"captions": [" of a pile of metal, torn and shredded paper, rocks, and a decayed animal.", " a small wooden house."], "sample_ids": ["c117f1923cad4ecf9df61b6e3d633374", "4cb4dba1237443eb8dc299530fa12521"], "properties": ["a pile of metal, torn and shredded paper, rocks, and a decayed animal", "house, material, wood"], "captions_pred_pc": ["a black and white map of germany on a white background", "a house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots"], "captions_pred_image": ["a 3d model of a crumpled piece of paper on a white background", "a 3d model of a small cottage"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a black building.", " a house with a wooden-framed roof structure."], "sample_ids": ["88702656e9684e1ea1a01dc7075c00e0", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["color, black, building", "roof, material, wood"], "captions_pred_pc": ["a close up of a black and white rug on a white background", "a black and white drawing of a staircase"], "captions_pred_image": ["a black 3d model of a house on top of a blueprint", "a 3d model of a building with a roof"], "question": "which building is made of wood", "label": 1}, {"captions": [" a small white box with a green light.", " a small house on a hill in a field."], "sample_ids": ["a5e37b9c782c4340b4dea45fbe1c701a", "bd873071252047d38160c4a5fdd2c1b7"], "properties": ["color, white, light", "house, hill, field"], "captions_pred_pc": ["a 3d sculpture of a vase made of small black dots on a white background 3d sculpture of a vase made of small black dots on a white background royalty free illustration", "a black and white photograph of a piece of paper"], "captions_pred_image": ["a 3d rendering of a small white box", "a black and white image of a small house"], "question": "which object is in a field?", "label": 1}, {"captions": ["white hospital bed, medical cart, and bedside table with wheels.", " a large orange pumpkin."], "sample_ids": ["e3ef6fd54af440efa31984214dd6f11d", "684df453535b4ec28c4d5b64dcd60f59"], "properties": ["bedside table, wheels, white", "orange, large, pumpkin"], "captions_pred_pc": ["a black and white illustration of a stool on wheels", "a black and white circular pattern on a white background a black and white circular pattern on a white background royalty free illustration"], "captions_pred_image": ["a 3d illustration of a white medical trolley on wheels", "a 3d model of a pumpkin on a white background"], "question": "which object is orange?", "label": 1}, {"captions": [" a large building with a roof and windows.", " a yellow boat/submarine with a red arrow and light."], "sample_ids": ["32d1fbd3ee91426882290305f70021e6", "9b9c31fe4e6b4004a4cb34176f329c04"], "properties": ["roof, windows, building", "color, shape, light"], "captions_pred_pc": ["of a black and white photo of a diamond buckle", "a black and white image of a letter 'l' on a white background"], "captions_pred_image": ["a 3d model of an apartment building royalty free 3d model preview no.2", "a 3d rendering of a table with an object on it"], "question": "which entity is a boat?", "label": 1}, {"captions": [" a building featuring graffiti, chinese writing, a door, a broken window, and an interior bathroom with a sink.", " a room featuring a wall with a painting, a hole, and a door."], "sample_ids": ["1ee3df6f94ea4c329a9c5245634e34d5", "1d1328346a464d2482463d6d5288e934"], "properties": ["graffiti, chinese writing, door", "painting, door, wall"], "captions_pred_pc": ["a black and white illustration of a bridge with dots", "in one hundred words or less an illustration of an igloo on a white background stock illustration"], "captions_pred_image": ["a black and white image of a bathroom with a sink and a toilet", "a black and white photograph of a torn piece of paper in the shape of a bird"], "question": "which entity has a door with a painting on it?", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " a potted christmas pine tree."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "460c8f3034a844159826fac3b8aa35a5"], "properties": ["roof truss, insulation, suspended ceiling", "a, color, green"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "a black and white illustration of a snowflake on a white background"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d model of a christmas tree in a vase"], "question": "which object is green", "label": 1}, {"captions": ["a collection of various bottles, including accutane, vitamin c with a smiley face, activespray, jolt 3d, active kids, pills, and active junior, all containing different types of medicine.", " of an egyptian sarcophagus."], "sample_ids": ["bf76d14fcac24579920aa326fa607a17", "70aa484af2ab44149a608dd81a6ff459"], "properties": ["accutane, vitamin c, activespray, active kids, pills, active junior", "sarcophagus, material, wood"], "captions_pred_pc": ["a black and white illustration of a microscopic image of a virus on a white background royalty free stock illustration", "a black and white circular pattern on a white background"], "captions_pred_image": ["a bottle of activated charcoal pills on a white background royalty free 3d model no.", "a black and white photograph of a sphere with egyptian hieroglyphics on it"], "question": "which object is made of wood", "label": 1}, {"captions": [" a small black house with a green roof, resembling a shed or container.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["bdb8e4c36ccb477890fd6ae569ae305c", "b896a0898efe4059a776193c02132129"], "properties": ["black, roof, green", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white drawing of a square with dots all over it", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a small black building", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": ["yellow and white 3d corn on the cob model resembling a toothbrush.", " a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles."], "sample_ids": ["1fd0e7ffe26349da89815a6d4d6a189a", "bd7aab78974643f5a0660c699daf8eb3"], "properties": ["color, shape, material", "roof, color, yellow"], "captions_pred_pc": ["a black and white illustration of a circle made up of tiny dots", "a black and white drawing of a room"], "captions_pred_image": ["a 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a", "a 3d model of a table and chairs on a white background"], "question": "which entity has a roof that is yellow", "label": 1}, {"captions": [" the earth featuring various elements such as temperature chart, blue and green stripes, blue arrow, exosphere label, england label, blue and purple stripes, and a blue flag.", " a small white house with stairs and a spiral staircase, featuring a white table and ceiling light."], "sample_ids": ["4945571db2d8467cb2aed8dd0d891c2e", "e9e1cc7fae22458197a61f43a9c355f4"], "properties": ["color, temperature, england", "house, staircase, table"], "captions_pred_pc": ["of a black and white photo of an airplane on a white background", "above a black and white photograph of a dog in a frame"], "captions_pred_image": ["a 3d model of the earth with a rainbow in the sky", "a 3d model of a small house with a spiral staircase"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" a red and brown rock formation with a broken tree branch.", " a house with a wooden-framed roof structure."], "sample_ids": ["c29d48d320c04ed1bf5aafe0a3df3d78", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["color, shape, texture", "roof, material, wood"], "captions_pred_pc": ["a black and white silhouette of an island on a white background", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d image of a rock formation on a snowy surface", "a 3d model of a building with a roof"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a green, spiked cube-like creature with eyes and a liquid-like texture.", " a cartoon hippo standing on its hind legs with arms outstretched."], "sample_ids": ["0d2850148a154f3ca72d72817e120759", "f3edc84dc71649c1a11270437279255b"], "properties": ["texture, spikes, eyes", "hippo, hind, legs"], "captions_pred_pc": ["a black and white drawing of a flower", "above a black and white illustration of a frog"], "captions_pred_image": ["a 3d model of an ice cube on a white background royalty-free 3d model preview no. 3", "a 3d model of a hippopotamus standing on its hind legs"], "question": "which entity has hind legs", "label": 1}, {"captions": [" of a soldier with outstretched arms", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["4982a7018e6f4503b5708c714b733ab4", "c3a82df41875402285608ef13a55df57"], "properties": ["arms, outstretched, soldier", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white illustration of a bird's wing", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a soldier with his arms outstretched royalty-free 3d model preview", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": ["a white ornate airplane design.", "a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier."], "sample_ids": ["23ec5bf5dd154e4a9df3194da7b8267a", "ef8288c9fdfc4e0f9c1fe25d570a104e"], "properties": ["color, shape, material", "color is white, yellow, plastic"], "captions_pred_pc": ["of a black and white image of an ornate door knocker", "a black and white image of a metal bowl with dots"], "captions_pred_image": ["a 3d model of a decorative ornament on a white background", "a white plastic container with a label on it"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a wooden-framed house with roof trusses.", " a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole."], "sample_ids": ["e60dd370c5ec468da4689a801f951157", "b16fb21cda9a4a21a024df749c2304f4"], "properties": ["frame, roof, trusses", "roof, ceiling, hole"], "captions_pred_pc": ["a black and white drawing of a metal grate", "a black and white image of a square with dots on it"], "captions_pred_image": ["a 3d model of a house under construction", "a 3d model of a small house and a tree in the foreground"], "question": "which house has a roof with a hole?", "label": 1}, {"captions": [" the white text \"ekberkaslan\" with a row of white cubes and a numbered box.", " a spider-like creature with long arms and legs."], "sample_ids": ["87ee30d475f34b799c24bf7ef3a7b540", "199bcb789e0c439bb2eeb32f2425cc36"], "properties": ["- color is white- shape is cubes- number is 1", "arachnid, leg, arm"], "captions_pred_pc": ["a close up of a black and white striped scarf", "a black and white illustration of a spider on a white background"], "captions_pred_image": ["a 3d image of the word ebercaskalan on a white background", "a black and white image of an alien creature"], "question": "which entity has more legs", "label": 1}, {"captions": ["a small blue plastic box with a lid and handle.", "a wooden-cased radio."], "sample_ids": ["3e5cc957c888491ba1a5773299d3daa9", "c79d1be9b9a0478993bee32c63231a88"], "properties": ["blue, lid, handle", "case, material, wood"], "captions_pred_pc": ["in 15 or fewer words a black and white illustration of a rectangular tray", "in 15 words or less a black and white drawing of a computer screen"], "captions_pred_image": ["a 3d rendering of a gray plastic container", "3d model of a vintage radio 3d model of a vintage radio"], "question": "which object is made of wood", "label": 1}, {"captions": [" a small house with a tree and a rock.", " a wooden staircase with a railing and table."], "sample_ids": ["9dc392a7f6e444e5bfb720684d6f864a", "956247bea850458199c651037d4b1d7f"], "properties": ["house, tree, rock", "railing, table, staircase"], "captions_pred_pc": ["in 15 words or less the image depicts a white square on a white background with black dots all over the square stock illustration", "above a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a small house with a tree in front of it", "a 3d model of a table with a staircase"], "question": "which entity has a table?", "label": 1}, {"captions": ["a featuring a fireplace, wooden bench, sled, log, castle, and cat.", " a futuristic white sports racing car."], "sample_ids": ["fe4cd6c3972940a5b7854ca1ebc8a5a3", "95f5d1518f1b4a638f4bc5e444a7e1a1"], "properties": ["fireplace, bench, log", "color, white, futuristic"], "captions_pred_pc": ["a black and white illustration of a snowflake", "| all images person 2019 3d model by person | person 2019 3d model by person"], "captions_pred_image": ["a 3d model of a fireplace with snow on the ground", "a 3d model of a futuristic sports car"], "question": "which object is white", "label": 1}, {"captions": ["a featuring a small room with a table, chair, laptop, and a white box containing a teddy bear and a piece of paper.", "a featuring a man, woman, robot, cat, and dog interacting in and around a house."], "sample_ids": ["24f7d0a06d494c26a1678d81b2b7b093", "92498f398e244020a867686729633610"], "properties": ["a, laptop, teddy bear", "a, house, cat, dog"], "captions_pred_pc": ["a black and white drawing of an umbrella on a white background", "above a black and white illustration of a dog sitting on top of a pile of rocks"], "captions_pred_image": ["a 3d rendering of a white room with various objects", "a 3d model of a person standing in front of a wall"], "question": "which entity has a house?", "label": 1}, {"captions": ["a featuring a chair, a building, a glass tower, and a throne made of money, with various unique elements such as a green roof and a computer chip.", " a small white house with a roof."], "sample_ids": ["18d2e75f23474d7489a6d7d605dfc76d", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["throne, chair, building", "roof, color, white"], "captions_pred_pc": ["a black and white illustration of a person sitting on a bench", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d model of a building on top of a table", "a 3d model of a building with a white roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" a white rock with green grass and moss on it.", " a house with a wooden-framed roof structure."], "sample_ids": ["0b53ca3fca7b4ef7bef27cb287daf32e", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["color, grass, moss", "roof, material, wood"], "captions_pred_pc": ["above a black and white drawing of a long, curved line on a white background", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d sculpture of a piece of white chocolate in the shape of a map", "a 3d model of a building with a roof"], "question": "which entity is made of wood", "label": 1}, {"captions": [" of a wooden cabinet with drawers and filing features.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["abbc90bbd5474f73b16482ccd10e07ec", "c3a82df41875402285608ef13a55df57"], "properties": ["Cabinet, Drawers, Filing", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d rendering of a white wooden chest of drawers", "a white plastic object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a white featuring a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet.", " of an ancient stone bowl, wooden headpiece, and broken pottery pieces."], "sample_ids": ["f28f57745af04115b0bacdde80c3b9ee", "d48b6ff03d6744eb921c41a4a05ff55d"], "properties": ["a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet", "bowl, pottery, headpiece"], "captions_pred_pc": ["a black and white illustration of a pair of sunglasses", "a black and white illustration of a circle made up of many small dots"], "captions_pred_image": ["a 3d model of a medieval drinking horn 3d model available for 3d printing royalty free 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d", "a 3d model of a piece of ancient pottery"], "question": "which object is made of stone", "label": 1}, {"captions": [" a wooden staircase with a door and square ceiling panel.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["3fe7e366bf924a00bf4b06ded35fd392", "bded33af34104b9686b845dfd18309a9"], "properties": ["door, panel, staircase", "table, staircase, light"], "captions_pred_pc": ["above a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d model of a staircase on a wooden floor", "a 3d model of a small table with a staircase"], "question": "which staircase is integrated into a table?", "label": 1}, {"captions": [" a red, blue, and green striped tower building.", " a row of houses featuring roof structures with green roofs and tiled roof slats."], "sample_ids": ["8668f9e9d1a64b86b31f260b8056cd19", "aef9b23a78a7450286a961cc13448d00"], "properties": ["color, red, blue, green", "roof, green, tiled"], "captions_pred_pc": ["a black and white drawing of a butterfly on a white background a black and white drawing of a butterfly on a white background royalty free illustration", "of a black and white photo of a decorative metal wall hanging"], "captions_pred_image": ["a 3d model of a pair of cylindrical towers with a staircase leading up to the top of one of the towers", "a 3d model of a set of stainless steel shelves"], "question": "which building has a green roof", "label": 1}, {"captions": ["a featuring a lamp, harp, white bowl, and white curved wall.", " of a small white dollhouse featuring furnished rooms, including a bedroom with a bed and desk, and a bathroom."], "sample_ids": ["55bcec23e1b34f0d9d748b4dcc3ea123", "f178fb523ad7421aaa90a92ee736ee00"], "properties": ["lamp, harp, bowl", "bedroom, bathroom, bed"], "captions_pred_pc": ["a black and white illustration of a curved line", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d model of a harp in a white room", "a 3d model of a small room with a bed, desk, and chair"], "question": "which entity has a bed?", "label": 1}, {"captions": [" a floating small island with trees, grass, and a mountain.", " a small house with a roof and ceiling-mounted air conditioner."], "sample_ids": ["95e0d4e2464b433dbb6c4d1d30e8150f", "6965067ea9e34357a7af21a2f078fbac"], "properties": ["island, mountain, grass", "roof, air conditioner, house"], "captions_pred_pc": ["a black and white map of the island of malta", "a black and white illustration of a window"], "captions_pred_image": ["a 3d image of a small island in the middle of a lake", "a 3d rendering of a small house with a covered porch"], "question": "which entity is not a floating small island with trees", "label": 1}, {"captions": ["a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier.", " of a white and brown sea shell with a hole and small pearls on it, resembling a sea urchin."], "sample_ids": ["ef8288c9fdfc4e0f9c1fe25d570a104e", "411c164757fc4de68dfecb35fa858223"], "properties": ["color is white, yellow, plastic", "resembles, sea urchin, shell"], "captions_pred_pc": ["a black and white image of a metal bowl with dots", "in 15 words or less a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a"], "captions_pred_image": ["a white plastic container with a label on it", "a 3d model of a sea urchin"], "question": "which entity is a shell?", "label": 1}, {"captions": ["3d lego model of the white house with a garden and tennis racket.", " a small white table with stairs and a ladder, featuring a black and white kitchen hood and a black square ceiling light."], "sample_ids": ["349d70e3f3d24c72ad05d5ceeee291b6", "5565c16f297e405f9d5dbf0ebb623605"], "properties": ["a, model, white house", "table, stairs, ladder"], "captions_pred_pc": ["a black and white photo of a person sitting on top of a pile of rocks", "above a black and white photograph of a small square in the center of the image"], "captions_pred_image": ["a 3d model of the white house royalty free 3d model", "a 3d model of a table with a stool on top"], "question": "which object has stairs", "label": 1}, {"captions": [" of a red spider on a rock and a robot on a round ceiling with a ceiling light fixture.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["fee48e4d25934ebb85ef6888d517d398", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["red, rock, ceiling", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["a black and white image of a circular object with dots on it", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a robotic spider on a rocky surface royalty free 3d model preview no.2", "a white 3d model of a city skyline"], "question": "which image shows a laptop?", "label": 1}, {"captions": ["a purple and yellow pixelated ethereum logo in pixel art style.", " an old lantern with moss on it."], "sample_ids": ["bcf111e592d64b6490003680cae9407f", "62925f26fd624310bd6b31136fe8e706"], "properties": ["color, style, pixel", "moss, lantern, old"], "captions_pred_pc": ["in 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 1", "in 15 words or less the image is a silhouette of a person standing on top of a pyramid made of dots"], "captions_pred_image": ["an image of an electronic device with the letter 'z' on it", "a black and white image of an old lantern"], "question": "which object has moss on it", "label": 1}, {"captions": ["a blue and white radio in the shape of a small suitcase.", "a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars."], "sample_ids": ["314cb57bed324d268c1205f5c7bf80ab", "1b3945962a4b4cda9fe939dc5d63e789"], "properties": ["color, shape, material", "a room, a cake, a table"], "captions_pred_pc": ["of a black and white drawing of a purse", "a black and white illustration of an object on a white background"], "captions_pred_image": ["a 3d model of an old-fashioned radio on a white background royalty-free 3d model preview no.2", "a 3d rendering of a white room with various items in it"], "question": "which entity has a room?", "label": 0}, {"captions": [" a building featuring graffiti, chinese writing, a door, a broken window, and an interior bathroom with a sink.", " a concrete wall with peeling paint and rusted metal features."], "sample_ids": ["1ee3df6f94ea4c329a9c5245634e34d5", "4376831ff557462dbacc4cce88a8cc86"], "properties": ["graffiti, chinese writing, door", "paint, rust, concrete"], "captions_pred_pc": ["a black and white illustration of a bridge with dots", "above a black and white image of a shelf on a white background"], "captions_pred_image": ["a black and white image of a bathroom with a sink and a toilet", "a 3d model of a concrete wall"], "question": "which entity is more likely to have a door", "label": 1}, {"captions": [" a house with a purple roof and glass block structure.", " a house featuring a pink-purple roof with trusses and wooden ceiling beams."], "sample_ids": ["e8ac7de076e54f07ace1a0ead07f6f57", "b6b6a3f82bdd47c3afaf9af885ba8703"], "properties": ["roof, color, purple", "roof, trusses, beams"], "captions_pred_pc": ["a black and white image of a fire hydrant on a white background fire hydrant on a white background royalty free illustration", "a black and white pattern of dots in the shape of a square royalty free illustration"], "captions_pred_image": ["a 3d model of a building with a glass roof", "a 3d model of the roof of a building"], "question": "which roof is made of trusses", "label": 1}, {"captions": [" of a white round table with a red accent.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["fea62a865b4e40899d95785533818329", "b896a0898efe4059a776193c02132129"], "properties": ["color, white, red", "- material is stone, metal, concrete"], "captions_pred_pc": ["above a black and white photograph of a person hanging from the ceiling", "of a white candle in the shape of a buddha"], "captions_pred_image": ["an empty white plate on a gray background", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": [" a large house/building structure with a roof.", " a large steel and metal structure with a pool and scaffolding system."], "sample_ids": ["82859e4c6d4e4bbea94b6252bef1d398", "5850d5c7223447db816081d50292fec0"], "properties": ["roof, structure, house", "structure, material, pool"], "captions_pred_pc": ["a black and white photograph of a metal sculpture", "a black and white drawing of a bridge with chains"], "captions_pred_image": ["a 3d model of a large white structure", "a 3d model of a large concrete structure"], "question": "which structure is made of metal", "label": 1}, {"captions": [" a black metal shelf with four holes and a laptop on it.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["b3b6f91d939d4193a0090eaabd39eb47", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["black, laptop, shelf", "torso, breasts, pattern"], "captions_pred_pc": ["a close up of a black and white tile on a white background", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a 3d rendering of a black metal shelf", "a 3d model of a woman's chest"], "question": "which entity has a pattern", "label": 1}, {"captions": [" of a human skull", " a house with a wooden-framed roof structure."], "sample_ids": ["3550b1feb37745b6a6dbec510ccb740b", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["a, b, c", "roof, material, wood"], "captions_pred_pc": ["a hedgehog in the center of the image a hedgehog in the center of the image on a white background royalty free illustration", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a human skull royalty-free 3d model preview no.1", "a 3d model of a building with a roof"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog.", " a large metal building with a roof and truss structure."], "sample_ids": ["f6c6e7a65a3e42dfa431b1d984c72f28", "b85a99699ccd4bcba213322113bb253d"], "properties": ["house, fence, dog", "roof, truss, structure"], "captions_pred_pc": ["above a black and white drawing of a bathroom", "of a metal grate on a white background"], "captions_pred_image": ["a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a", "a 3d model of a long metal fence"], "question": "which building has a roof and truss structure", "label": 1}, {"captions": [" a house featuring a purple roof and a suspended ceiling with a light fixture.", " a small white box with a green light."], "sample_ids": ["579b43057ef74bd08d06bdd3e8d973a0", "a5e37b9c782c4340b4dea45fbe1c701a"], "properties": ["roof, purple, suspended", "color, white, light"], "captions_pred_pc": ["a black and white image of a piece of paper with a pattern of dots on it", "a 3d sculpture of a vase made of small black dots on a white background 3d sculpture of a vase made of small black dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of the roof of a building royalty free 3d model no.", "a 3d rendering of a small white box"], "question": "which entity is white", "label": 1}, {"captions": ["a featuring a large flying ship, a mountain range with a central lake, and a small island resembling hawaii.", " of a meat skewer with a small piece of bread and a sausage on a stick."], "sample_ids": ["4d613d2057454e719bcae7f8cf05210a", "1728f2cb8eca4080af02b22262ff45d5"], "properties": ["a, island, resembles, hawaii", "meat, bread, sausage"], "captions_pred_pc": ["above a black and white image of a person's hand holding a pencil", "a black and white image of a brush on a white background"], "captions_pred_image": ["a 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a", "an image of a small white object on a gray background"], "question": "which entity is a food?", "label": 1}, {"captions": [" of a yellow and black wasp, available in 3ds max, obj, fbx, and c formats.", " a large building with a roof and windows."], "sample_ids": ["6a4b9e9fce7a4a2486fcdc45363f48c2", "32d1fbd3ee91426882290305f70021e6"], "properties": ["3ds max, obj, fbx, c", "roof, windows, building"], "captions_pred_pc": ["a black and white insect on a white background", "of a black and white photo of a diamond buckle"], "captions_pred_image": ["a 3d model of a black and white striped mosquito", "a 3d model of an apartment building royalty free 3d model preview no.2"], "question": "which entity has a roof", "label": 1}, {"captions": [" a crab with long legs, wires, and tentacles, resembling a hybrid of an octopus, squid, spider, and robot.", " a sword with a wooden handle."], "sample_ids": ["ec5914b53b6a4cde8de4820050bc46c5", "d42eec2b8e7644ec9ededcf9f325faad"], "properties": ["resembles, octopus, squid, spider, robot", "handle, material, wood"], "captions_pred_pc": ["a black and white illustration of a jellyfish", "a black and white image of a knife on a white background"], "captions_pred_image": ["a 3d model of a robotic octopus on a white background royalty free 3d model preview no.2", "a black and white image of a sword"], "question": "which object has a wooden handle", "label": 1}, {"captions": [" a white hospital operating room with blue containers and medical equipment.", " a white and black striped box resembling a stack of blocks or paper."], "sample_ids": ["a10f3196831647bc8842eacac640047d", "78246d66fd2e4e1195bc4536f4037862"], "properties": ["color, white, containers", "striped, white, black"], "captions_pred_pc": ["a black and white illustration of the letter 'f' made up of tiny dots", "a black and white illustration of a pair of shoes on a white background vector illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a"], "captions_pred_image": ["a 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room", "a 3d model of a stack of books"], "question": "which object is white and black?", "label": 1}, {"captions": [" of a cup, bottle, and two metal buckets on a chessboard with a square ceiling light fixture.", " an african figurine with a bow and arrow, standing on a wooden base, wearing a hat."], "sample_ids": ["05b5a5da1a0a4c1fa60a9e5edd5c3424", "d81d13362ae04371bb2cba46e4939665"], "properties": ["cup, bottle, chessboard", "hat, bow, arrow"], "captions_pred_pc": ["a black and white 3d shape made up of small dots on a white background", "above a black and white photo of an ice sculpture"], "captions_pred_image": ["a 3d model of a chess set on a checkered board royalty-free 3d model", "a sculpture of an african man sitting on a pedestal"], "question": "which entity has a hat?", "label": 1}, {"captions": [" a white and yellow table with yellow legs and a metal roof structure.", " a white and black striped box resembling a stack of blocks or paper."], "sample_ids": ["a71c43af3c944bf5b6d12375f7d54811", "78246d66fd2e4e1195bc4536f4037862"], "properties": ["color, white, yellow, roof, metal", "striped, white, black"], "captions_pred_pc": ["above a black and white drawing of a floor plan", "a black and white illustration of a pair of shoes on a white background vector illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a"], "captions_pred_image": ["a 3d model of a white table with multiple cubes", "a 3d model of a stack of books"], "question": "which object is white and black?", "label": 1}, {"captions": [" a small white house with stairs and a wall-mounted shelf.", "a black and white of a knife/sword with a handle."], "sample_ids": ["10c4ba5b0db4490db9c00c21c94cb41f", "c7692fa635e049bda0a2039fa5a784a4"], "properties": ["house, color, white", "image, color, black and white"], "captions_pred_pc": ["above a black and white drawing of a bench", "of a black and white knife on a white background"], "captions_pred_image": ["a 3d model of a small white building", "a black and white image of a knife"], "question": "which entity is not a color image?", "label": 1}, {"captions": [" a castle on an island with a small floating house, trees, and clouds.", " a multicolored cube representing a protein, featuring pink, yellow, red, and green hues."], "sample_ids": ["c4c09479570943e2845fbd4c6a450568", "ee7c3113f2754f9cbe8980b1b7cc4eff"], "properties": ["castle, island, house", "color, shape, color"], "captions_pred_pc": ["above a black and white illustration of a group of dots in the shape of a circle", "a black and white pattern of small dots on a white background a black and white pattern of small dots on a white background illustration"], "captions_pred_image": ["a 3d model of a small house on an island", "a 3d model of a piece of fabric"], "question": "which entity is a cube?", "label": 1}, {"captions": ["a 3d object featuring elements of a green and purple lamp, a cartoon character with a light bulb, an ice cream cone, a syringe, a light pole, a pen with a face, and a doctor's stethoscope.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["42d3657ce7bc4b5dbeb7f444e089a715", "06a1c233fb444830b577aa06e2c01294"], "properties": ["a lamp, a syringe, a light pole", "house, tree, hill"], "captions_pred_pc": ["a spoon with black dots on a white background spoon with black dots on a white background, 3d illustration royalty free stock photo", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop", "a black and white image of a house in the middle of a field"], "question": "which entity has a house?", "label": 1}, {"captions": [" a small house with a blue roof, a door, and a pool.", " of a small house featuring a flat, brown roof, table with two chairs and a stool, ceiling light, and a window."], "sample_ids": ["40c52c2d278345c5b4e8d00a991271dc", "3a509431d96b43f8a7aebe2846f08b96"], "properties": ["door, roof, pool", "roof, brown, flat"], "captions_pred_pc": ["of a black and white photo of a window with fringes", "a black and white drawing of a snowflake on a white background a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small house", "a 3d rendering of a table and stool"], "question": "which house has a brown roof?", "label": 1}, {"captions": ["a 3d object collection featuring a long stick, a grassy hill, a fish, a green and yellow knife, a small green and purple fish, a bird, and a crocodile.", " a large, black and white circular building, resembling a stadium or ring structure."], "sample_ids": ["856e5f6a854d4c608901f1e2b580344c", "67f46bb0048244c687a58d1017a08f6b"], "properties": ["a, b, c", "building, color, black and white"], "captions_pred_pc": ["above a black and white drawing of a submarine", "the letter c in black and white illustration of the letter c in black and white illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustr"], "captions_pred_image": ["a black and white photograph of a fish on a gray background", "a 3d model of a circular fence with black and white stripes"], "question": "which entity is a building?", "label": 1}, {"captions": ["pink and green pendant light hanging from a ceiling.", " of a beige and white round soap dish/small bowl."], "sample_ids": ["1651a898288149edb8cbff0e1b2d692b", "5414d75e47104589837f3df8b6de6d22"], "properties": ["color, pink, green", "beige, white, round"], "captions_pred_pc": ["above a black and white photo of a small circular object on a white background", "of a 3d model of a bracelet 3d model of a bracelet on a white background royalty free illustration 2019"], "captions_pred_image": ["a white pendant light hanging from the ceiling", "a white ceramic bowl sitting on top of a gray surface"], "question": "which object is round?", "label": 1}, {"captions": ["a featuring a boat, table, chairs, umbrella, and solar panel.", " of a small white dollhouse featuring furnished rooms, including a bedroom with a bed and desk, and a bathroom."], "sample_ids": ["0f0eb3a198d341d28f809b6d7634be8a", "f178fb523ad7421aaa90a92ee736ee00"], "properties": ["boat, table, chairs, umbrella, solar panel", "bedroom, bathroom, bed"], "captions_pred_pc": ["a black and white illustration of a boat with an umbrella", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d model of a boat, a table, chairs, and an umbrella", "a 3d model of a small room with a bed, desk, and chair"], "question": "which entity has a bathroom?", "label": 1}, {"captions": ["a featuring a bowl of soup, a brush, a knife, a slice of pizza, a torn piece of paper, and a roof with a hole in it.", " a tan, cylindrical hat with a hole in it."], "sample_ids": ["db19e46828c94b1a8b9a6ca9f673c604", "00c350e9f5f24fcd8bcc378da2963d4c"], "properties": ["a, roof, soup", "hat, color, tan"], "captions_pred_pc": ["in 15 words or fewer a black and white illustration of the moon", "a black and white drawing of a mushroom on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a bowl and chopsticks on a sheet of paper", "a 3d model of an object with a hole in it"], "question": "which object has a hole in it", "label": 1}, {"captions": ["a featuring a mossy rock, a piece of wood, a rocky island, a plane, a tree branch, a gray rock, a rocky mountain, and a moss-covered tree branch.", " a spiral staircase with railings."], "sample_ids": ["09e5288a9e98421985ee6e0042b3c325", "7f24a859ed3d4b7c86f940d8cc11218f"], "properties": ["mossy, rock, rocky", "railings, staircase, spiral"], "captions_pred_pc": ["a black and white illustration of a small island in the middle of a body of water", "above a black and white drawing of a spiral staircase"], "captions_pred_image": ["a 3d model of the comet 67p/churyumov-gerasimenko", "a 3d rendering of a staircase in a room"], "question": "which staircase has railings", "label": 1}, {"captions": ["a 3d wooden pole with a metal spear handle.", " a wooden billiard table with legs."], "sample_ids": ["1d5cf234576e41f0ba209c5e19d2db47", "b20ad62516fa467ba6e8de063998e8e4"], "properties": ["- material is wood, metal, metal", "legs, material, wood"], "captions_pred_pc": ["of a black and white illustration of a vase on a pedestal", "a black and white drawing of a rectangular shaped object"], "captions_pred_image": ["a black and white image of an old-fashioned pitchfork", "a black and white image of a pool table"], "question": "which object is made of wood", "label": 1}, {"captions": [" a house with a purple roof and glass block structure.", "a white of a small archway gate."], "sample_ids": ["e8ac7de076e54f07ace1a0ead07f6f57", "a48aae47988c4daa9531e33b1e3085f8"], "properties": ["roof, color, purple", "image, color, white"], "captions_pred_pc": ["a black and white image of a fire hydrant on a white background fire hydrant on a white background royalty free illustration", "above a black and white illustration of an arch"], "captions_pred_image": ["a 3d model of a building with a glass roof", "a 3d model of a white arch"], "question": "which image is white", "label": 1}, {"captions": ["a featuring a metal refrigerator, a graffiti-covered sink, and a metal toilet, all with rusted elements.", " a black triangular metal object with a clock and cross on it, and a small machine on top."], "sample_ids": ["e65de9c4ec9242679a45e74733f7d61d", "b198a81dc41c4fde8be3ca51c3b0e676"], "properties": ["rusty, sink, graffiti", "metal, cross, clock"], "captions_pred_pc": ["a black and white drawing of a room with a door and a rug on the floor a black and white drawing of a room with a door and a rug on the floor royalty free illustration", "above a black and white image of an object on a white background"], "captions_pred_image": ["a stainless steel toilet bowl on a pedestal", "a 3d model of a piece of furniture"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a wheeled hospital bed", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["40aa2ee15441439eb326c4abfcf4cd00", "c3a82df41875402285608ef13a55df57"], "properties": ["wheeled, bed, hospital", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white illustration of a rectangular shawl", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a hospital bed royalty-free 3d model - no 3", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": ["a white featuring a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet.", " a wooden table and bench with a deer head and branch on it."], "sample_ids": ["f28f57745af04115b0bacdde80c3b9ee", "857d5391612349f4ae6cd854a1ec96de"], "properties": ["a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet", "table, bench, deer"], "captions_pred_pc": ["a black and white illustration of a pair of sunglasses", "a black and white drawing of a table and chairs"], "captions_pred_image": ["a 3d model of a medieval drinking horn 3d model available for 3d printing royalty free 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d", "a black and white image of a bench and table with a deer's head on the table"], "question": "which entity has a deer head on it?", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", " a large rock structure with a cave and small hole, resembling a stone sculpture and featuring a white plastic bag."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "120bf1525e8649d9bdf3a593fe8f5ddc"], "properties": ["color, shape, and size", "resembles, sculpture, rock"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "a black and white illustration of a piece of paper with dots on it"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d model of a bag with a zipper on it"], "question": "which entity is a rock?", "label": 1}, {"captions": ["a featuring a red hat, floating cup, bowl filled with candy, and a strawberry.", "a featuring a red hat, floating cup, bowl filled with candy, and a strawberry."], "sample_ids": ["e27a9fd533dc41da9cf2eeb8fee2a5af", "e27a9fd533dc41da9cf2eeb8fee2a5af"], "properties": ["hat, candy, strawberry", "hat, candy, strawberry"], "captions_pred_pc": ["a black and white illustration of two spheres", "a black and white illustration of two spheres"], "captions_pred_image": ["a black and white image of a person wearing a hat", "a black and white image of a person wearing a hat"], "question": "which entity has a floating cup?", "label": 0}, {"captions": ["modern tan leather lounge chair.", "a featuring a building, a square, a cloud with a square in the middle, a group of people in a room and a field, and clouds floating in the sky."], "sample_ids": ["409916a53a0d434599e2a3f52bfe9396", "8557a15b9f244d2cbf16786dbc8b7b25"], "properties": ["color, tan, leather", "building, room, sky"], "captions_pred_pc": ["a black and white illustration of an object in the shape of a butterfly on a white background 3d illustration of a black and white illustration of an object in the shape of a butterfly on a white background vector illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustration of an object in the shape of a butterfly on a white background illustration of a black and white illustr", "above a black and white image of a person's hand holding a paintbrush"], "captions_pred_image": ["a gray leather lounge chair with a metal base", "a 3d rendering of white clouds floating in the air"], "question": "which entity has a room?", "label": 1}, {"captions": [" a small house with a pond and situated on a rock.", " a large steel building with a pool."], "sample_ids": ["92859eb82a344134806b37cc209927c6", "72eed67d8d884c819b28e2e95eb48f06"], "properties": ["house, rock, pond", "building material, pool, steel"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a toaster", "a black and white illustration of a building with a tree growing out of it"], "captions_pred_image": ["a 3d model of a house in the middle of a field", "a 3d model of a concrete structure"], "question": "which building is made of steel", "label": 1}, {"captions": [" a house with a roof structure and toothbrushes.", " a black and white striped box."], "sample_ids": ["7632d1ba4e8144c19484c263b6074d0c", "00fa8accaaad44c780efe0c04ed4a12b"], "properties": ["house, roof, toothbrushes", "color, black, white"], "captions_pred_pc": ["a black and white illustration of the letter 'b' isolated on a white background illustration", "in 15 words or less a black and white pattern on a white background"], "captions_pred_image": ["a 3d rendering of a white box with a lot of blades", "a 3d image of a black and white striped surface"], "question": "which object is black and white?", "label": 1}, {"captions": [" a small white house with a staircase and a window.", "a featuring a boat, table, chairs, umbrella, and solar panel."], "sample_ids": ["9eb88d17310d42dda9e17883e9922525", "0f0eb3a198d341d28f809b6d7634be8a"], "properties": ["house, staircase, window", "boat, table, chairs, umbrella, solar panel"], "captions_pred_pc": ["a black and white illustration of a person standing in the middle of a crowd of tiny black dots on a white background royalty free illustration", "a black and white illustration of a boat with an umbrella"], "captions_pred_image": ["a 3d rendering of a small room with a staircase", "a 3d model of a boat, a table, chairs, and an umbrella"], "question": "which entity has a solar panel", "label": 1}, {"captions": ["a 3d object featuring a flat roof with blue and white pattern, a purple and white bench, a blue and purple striped runner, a bed with blue and purple stripes, blue and white striped paper, a bench with a blue and purple pattern, and a ceiling light fixture with wooden slats.", " a potted christmas pine tree."], "sample_ids": ["28d0cf71ed684dcb87b2c9b2744c3633", "460c8f3034a844159826fac3b8aa35a5"], "properties": ["runner, bed, bench", "a, color, green"], "captions_pred_pc": ["a black and white drawing of a snowflake on a white background a creative black and white drawing of a snowflake on a white background royalty free illustration", "a black and white illustration of a snowflake on a white background"], "captions_pred_image": ["a 3d model of a long, curved structure", "a 3d model of a christmas tree in a vase"], "question": "which object is green", "label": 1}, {"captions": ["a purple and yellow pixelated ethereum logo in pixel art style.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["bcf111e592d64b6490003680cae9407f", "97e000ff41094665afd94ea565da8b13"], "properties": ["color, style, pixel", "roof, material, wood"], "captions_pred_pc": ["in 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 1", "a black and white drawing of a floor plan"], "captions_pred_image": ["an image of an electronic device with the letter 'z' on it", "a 3d model of the roof of a building"], "question": "which entity is not made of wood", "label": 1}, {"captions": [" of a small wooden house with two roofs.", " a large building with a roof and windows."], "sample_ids": ["30fc23ae4edb42609e30e029dede54bd", "32d1fbd3ee91426882290305f70021e6"], "properties": ["house, roof, wooden", "roof, windows, building"], "captions_pred_pc": ["of a pair of stainless steel screws on a white background", "of a black and white photo of a diamond buckle"], "captions_pred_image": ["a 3d model of a small barn", "a 3d model of an apartment building royalty free 3d model preview no.2"], "question": "which building has a roof and windows", "label": 1}, {"captions": [" a silver spaceship-skull hybrid with blue lights, spheres, and jewels, featuring a robot with a head and blue eyes.", " a woman in a red dress holding a tennis racket, wearing a hat."], "sample_ids": ["ae7c805076b14abe83578069c7bf1c03", "b89b19ddadd04d6799e90b611c889bae"], "properties": ["color, light, jewels", "hat, dress, racket"], "captions_pred_pc": ["in 15 words or less a black and white image of a snowflake on a white background royalty free illustration", "a black and white illustration of a dendritic cell"], "captions_pred_image": ["a 3d sculpture of a heart with various objects on top of it", "a black and white photograph of a woman holding a tennis racket"], "question": "which entity is a woman?", "label": 1}, {"captions": [" a small green cannon wheel with a handle.", "a white glass beer mug."], "sample_ids": ["97d13db38fa24556afa1eef04fc518e6", "1d686cbd3e9a4c629a43088658989286"], "properties": ["color, green, handle", "color, white, glass"], "captions_pred_pc": ["of a small black object on a white background", "a black and white drawing of a beer mug on a white background"], "captions_pred_image": ["a 3d model of a cannon with wheels", "a 3d model of a glass pitcher"], "question": "which object is made of glass", "label": 1}, {"captions": [" a yellow monster-sphere with teeth.", " a wooden billiard table with legs."], "sample_ids": ["fc5f906bab2c4c36a406ebdc15f58541", "b20ad62516fa467ba6e8de063998e8e4"], "properties": ["color, shape, texture", "legs, material, wood"], "captions_pred_pc": ["a black and white illustration of a dandelion on a white background dandelion illustration on a white background illustration", "a black and white drawing of a rectangular shaped object"], "captions_pred_image": ["a 3d model of a white ball with teeth", "a black and white image of a pool table"], "question": "which object has legs", "label": 1}, {"captions": [" a small house with a tree, pool, and pond in a green and blue landscape.", " a large steel and metal structure with a pool and scaffolding system."], "sample_ids": ["e22489ba182f45cb81f0a83f22abe9bd", "5850d5c7223447db816081d50292fec0"], "properties": ["house, tree, pool", "structure, material, pool"], "captions_pred_pc": ["in 15 words or less a black and white image of a square with dots around it", "a black and white drawing of a bridge with chains"], "captions_pred_image": ["a 3d model of a house and a tree in a box royalty free 3d model preview no.3", "a 3d model of a large concrete structure"], "question": "which structure has a pool", "label": 1}, {"captions": [" a red and brown rock formation with a broken tree branch.", " a wooden billiard table with legs."], "sample_ids": ["c29d48d320c04ed1bf5aafe0a3df3d78", "b20ad62516fa467ba6e8de063998e8e4"], "properties": ["color, shape, texture", "legs, material, wood"], "captions_pred_pc": ["a black and white silhouette of an island on a white background", "a black and white drawing of a rectangular shaped object"], "captions_pred_image": ["a 3d image of a rock formation on a snowy surface", "a black and white image of a pool table"], "question": "which object has legs", "label": 1}, {"captions": [" a green, spiked cube-like creature with eyes and a liquid-like texture.", "a small white 3d boat model with a curved wing and knife."], "sample_ids": ["0d2850148a154f3ca72d72817e120759", "cafc467aff2643e8b70149c4944263ee"], "properties": ["texture, spikes, eyes", "wing, knife, boat"], "captions_pred_pc": ["a black and white drawing of a flower", "a black umbrella on a white background"], "captions_pred_image": ["a 3d model of an ice cube on a white background royalty-free 3d model preview no. 3", "a 3d model of a rowing boat royalty free 3d model preview no 3"], "question": "which entity has a wing?", "label": 1}, {"captions": [" a pyramid with blue and pink lines, wires, and mesh.", " a white castle composed of small cubes."], "sample_ids": ["a3b2db8d5c6044f88b275839d0cd71bd", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["color, shape, material", "composed of, white, cubes"], "captions_pred_pc": ["a black and white image of a patterned rug", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["a 3d model of the pyramid roof royalty-free 3d model preview no.1", "a 3d model of a castle made of white cubes"], "question": "which entity is composed of cubes", "label": 1}, {"captions": [" of a wooden windmill with a red roof.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["2ad8fca30285483d8b7f602fa078215d", "bded33af34104b9686b845dfd18309a9"], "properties": ["roof, color, red", "table, staircase, light"], "captions_pred_pc": ["a white and black image of a snowflake in the shape of a snowflake", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d model of a windmill on a gray background", "a 3d model of a small table with a staircase"], "question": "which object has a staircase?", "label": 1}, {"captions": [" a house with a blue roof, chimney, and wooden-beamed ceiling.", " a small white building with stairs and shelves."], "sample_ids": ["b380dd4800124a8d96424a504eb0ec6a", "9e1f64d4fd514059be934077717536dc"], "properties": ["roof, color, blue", "building, stairs, shelves"], "captions_pred_pc": ["of a white lace clutch purse on a white background", "a black and white image of a person standing in front of a white background"], "captions_pred_image": ["a 3d model of a building with many windows", "a white 3d model of a building with stairs"], "question": "which building has stairs and shelves", "label": 1}, {"captions": [" a castle on an island with a small floating house, trees, and clouds.", " a staircase with a railing, table, and chair, featuring a square ceiling light."], "sample_ids": ["c4c09479570943e2845fbd4c6a450568", "51a0fba79bce472a8b827b78a110b77f"], "properties": ["castle, island, house", "stair, table, chair"], "captions_pred_pc": ["above a black and white illustration of a group of dots in the shape of a circle", "for a black and white image of a toilet paper holder"], "captions_pred_image": ["a 3d model of a small house on an island", "a 3d model of a staircase in a room"], "question": "which entity has a table and chair?", "label": 1}, {"captions": ["red and yellow toy robot fireman with a helmet, wheels, and holding a fire extinguisher.", "star wars stormtrooper "], "sample_ids": ["e01a1919e9404ca7a4dd52c593649b62", "05678f11f4fe47178c9b4941ee334e16"], "properties": ["color, wheel, helmet", "a, color, white"], "captions_pred_pc": ["for a black and white image of a pair of earrings", "a black and white illustration of a stormtrooper"], "captions_pred_image": ["a 3d model of a robot wearing a helmet and carrying a large object", "a 3d model of a star wars stormtrooper"], "question": "which entity is white", "label": 1}, {"captions": ["a featuring a white and blue structure with a table, blue blocks, and suspended blue cubes.", " a black and white striped box."], "sample_ids": ["1d2cfe3a03004b62b17d3ce065658302", "00fa8accaaad44c780efe0c04ed4a12b"], "properties": ["color, table, blocks", "color, black, white"], "captions_pred_pc": ["a group of people standing on top of each other on a white background a group of people standing on top of each other on a white background royalty free illustration", "in 15 words or less a black and white pattern on a white background"], "captions_pred_image": ["a 3d model of a table with four legs", "a 3d image of a black and white striped surface"], "question": "which object is black and white?", "label": 1}, {"captions": [" a house with a green roof and lawn.", " a house featuring a roof with wooden trusses and a ladder."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "3cd410c4359a4cef98702963a2b9802b"], "properties": ["roof, green, lawn", "roof, trusses, ladder"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "a black and white drawing of a tv screen on a white background"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a 3d model of the roof of a building"], "question": "which roof is made of wood", "label": 1}, {"captions": [" a red \"welcome to northwich\" billboard on a wooden base.", " a small building with a yellow roof, featuring a box, a chair, and a plane flying overhead."], "sample_ids": ["225e4094141d416faba7c5598dc55ff8", "a2354f13774340d392fbf33564934aab"], "properties": ["base material is wood, color is red, message is welcome to northwich", "building, roof, yellow"], "captions_pred_pc": ["a black and white illustration of a circular object with many small dots on it", "a black and white image of a cell phone"], "captions_pred_image": ["a 3d model of a chalkboard on a pedestal royalty free 3d model preview no.2", "a 3d rendering of a machine with a conveyor belt"], "question": "which building has a yellow roof", "label": 1}, {"captions": [" of a sword and knife.", " a small white building featuring a room with a door and a white shelf."], "sample_ids": ["8b567403ba614523a298f1c5b2009f92", "d7b78fa9a6b64f6095b881bc619b04fe"], "properties": ["a, sword, knife", "room, door, shelf"], "captions_pred_pc": ["a black silhouette of a knife on a white background", "above a black and white illustration of a person standing in front of a door"], "captions_pred_image": ["a 3d model of a samurai sword royalty free 3d model preview no 2", "a 3d model of an empty room"], "question": "which entity has a door?", "label": 1}, {"captions": [" a large, black and white circular building, resembling a stadium or ring structure.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["67f46bb0048244c687a58d1017a08f6b", "6b745457e06840119058883b35f78f58"], "properties": ["building, color, black and white", "roof, color, blue"], "captions_pred_pc": ["the letter c in black and white illustration of the letter c in black and white illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustr", "a black and white image of a building with dots"], "captions_pred_image": ["a 3d model of a circular fence with black and white stripes", "a 3d model of a house with a steeple on top"], "question": "which building has a blue roof", "label": 1}, {"captions": [" of a green man with arms outstretched, appearing to fly.", " a house with a green, wooden-structured roof."], "sample_ids": ["34850e40521940e49cfc27f0f486f544", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["color, shape, size", "roof, color, green"], "captions_pred_pc": ["a black and white illustration of an airplane propeller", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a 3d model of a man with his arms outstretched royalty free 3d model no.2", "a 3d model of a house with a triangular roof"], "question": "which entity is a building?", "label": 1}, {"captions": [" a table with two bowls and a cup on it.", "white s of a wall-mounted light, toilet with handle, faucet, and lamp with a light bulb."], "sample_ids": ["41842c8d2ebd402da04def3c53c41633", "92052c493bf141a08b56f30f9c5e2d61"], "properties": ["a, bowl, cup", "light, toilet, faucet"], "captions_pred_pc": ["a black and white image of two eyes in the sky", "in 15 words or less a 3d illustration of an object made of dots on a white background 3d illustration of an object made of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a table on a marble floor royalty free 3d model preview no. 3", "a white plastic toilet paper holder on a gray background"], "question": "which entity has a light", "label": 1}, {"captions": [" a black table with a metal shelf and folding door, featuring a hanging metal shelf from the ceiling.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["c7fdb8d8f32b415cb5d2a9b41dd7d77b", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["Black, Shelf, Metal", "house, fence, playground"], "captions_pred_pc": ["a black and white drawing of a spiral staircase", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a 3d model of a black shelf with square tiles", "a 3d model of a room with a lot of wires"], "question": "which entity has a fence", "label": 1}, {"captions": [" a small house with a roof.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["9578e8de15ec44ce802072aaa4df3910", "06a1c233fb444830b577aa06e2c01294"], "properties": ["roof, house, small", "house, tree, hill"], "captions_pred_pc": ["above a black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a 3d model of a small house", "a black and white image of a house in the middle of a field"], "question": "which house is on a hill?", "label": 1}, {"captions": [" of a black shark", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["01b98721613b4c0ea23982c048955c1f", "c3a82df41875402285608ef13a55df57"], "properties": ["black, mouth, teeth", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white image of an object on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a black shark on a gray background royalty free 3d model preview no.2", "a white plastic object on a gray background"], "question": "which object is not a shark?", "label": 1}, {"captions": [" a house featuring a pink-purple roof with trusses and wooden ceiling beams.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["b6b6a3f82bdd47c3afaf9af885ba8703", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["roof, trusses, beams", "house, pool, balcony"], "captions_pred_pc": ["a black and white pattern of dots in the shape of a square royalty free illustration", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d model of a modern house"], "question": "which house has a pool", "label": 1}, {"captions": ["smiley-faced banana .", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["cc0099a687194a31a052ac761f5fdfea", "c3a82df41875402285608ef13a55df57"], "properties": ["face is smiling, banana is yellow, smiley face is a sticker", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["above a black and white image of a surfboard on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a banana with a smiling face", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": ["a 3d object featuring a rock, shell, piece of paper, and cat.", " of a beige and white round soap dish/small bowl."], "sample_ids": ["53efab50e5a74e5ea165c763cea15be4", "5414d75e47104589837f3df8b6de6d22"], "properties": ["a, rock, paper", "beige, white, round"], "captions_pred_pc": ["for a flock of birds in the sky", "of a 3d model of a bracelet 3d model of a bracelet on a white background royalty free illustration 2019"], "captions_pred_image": ["a black and white image of a piece of paper in the shape of an island", "a white ceramic bowl sitting on top of a gray surface"], "question": "which object is round?", "label": 1}, {"captions": [" a small, snow-covered house.", " of a beige and white round soap dish/small bowl."], "sample_ids": ["0d00d10b90134dbe9ce7b2b3d6669237", "5414d75e47104589837f3df8b6de6d22"], "properties": ["house, snow, cover", "beige, white, round"], "captions_pred_pc": ["in 15 words or less a black and white image of a piece of paper on a white background royalty free illustration", "of a 3d model of a bracelet 3d model of a bracelet on a white background royalty free illustration 2019"], "captions_pred_image": ["a piece of broken glass on a white background", "a white ceramic bowl sitting on top of a gray surface"], "question": "which object is round?", "label": 1}, {"captions": [" of a wooden cabinet with drawers and filing features.", " of a meat skewer with a small piece of bread and a sausage on a stick."], "sample_ids": ["abbc90bbd5474f73b16482ccd10e07ec", "1728f2cb8eca4080af02b22262ff45d5"], "properties": ["Cabinet, Drawers, Filing", "meat, bread, sausage"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white image of a brush on a white background"], "captions_pred_image": ["a 3d rendering of a white wooden chest of drawers", "an image of a small white object on a gray background"], "question": "which entity is a food?", "label": 1}, {"captions": [" a large metal building with a roof and truss structure.", " of a house with a roof truss, chimney, and suspended ceiling."], "sample_ids": ["b85a99699ccd4bcba213322113bb253d", "9401dfc901b2447a9c0eb27da56854d7"], "properties": ["roof, truss, structure", "roof truss, chimney, suspended ceiling"], "captions_pred_pc": ["of a metal grate on a white background", "in 15 words or less a black and white illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a"], "captions_pred_image": ["a 3d model of a long metal fence", "a 3d model of a house with a roof"], "question": "which entity has a roof truss", "label": 1}, {"captions": [" a white rocket ship.", " of a black tray with three green rings on it."], "sample_ids": ["22137b9fff744310ad3b4abe6d869718", "77d47814be1c4d1e8fd6207af2f4e095"], "properties": ["color, shape, size", "color, black, rings"], "captions_pred_pc": ["above a black and white illustration of a planet", "a black and white image of three dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a rocket ship royalty free 3d model preview no.1", "a 3d rendering of a tray with three rings on it"], "question": "which object is black?", "label": 1}, {"captions": ["a low poly of a plant on a white object, resembling a paper or plastic bag.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["d49d8ed244094349a99e4faca05e0690", "97e000ff41094665afd94ea565da8b13"], "properties": ["low poly, plant, white", "roof, material, wood"], "captions_pred_pc": ["a black and white illustration of a dandelion on a white background a black and white illustration of a dandelion on a white background royalty free illustration", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d model of a plant growing out of a rock", "a 3d model of the roof of a building"], "question": "which object is made of wood", "label": 1}, {"captions": ["a featuring a small room with a bunk bed, desk, chair, table, and a blue house.", "a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars."], "sample_ids": ["dd3a9323ed514ccab330973ff9588015", "1b3945962a4b4cda9fe939dc5d63e789"], "properties": ["room, bed, desk", "a room, a cake, a table"], "captions_pred_pc": ["a black and white drawing of a door", "a black and white illustration of an object on a white background"], "captions_pred_image": ["a 3d model of a small room with a bunk bed", "a 3d rendering of a white room with various items in it"], "question": "which entity has a room with a cake?", "label": 1}, {"captions": [" of a white rectangular shelf or light fixture on a gray background.", " a white rocking chair with a curved backrest."], "sample_ids": ["a84221f27ed0416280f8e67563f95ed7", "ee0deb90abf943b6894cd5ded1331213"], "properties": ["background, color, white", "backrest, curved, yes"], "captions_pred_pc": ["a black line on a white background", "a black and white illustration of a spiral pattern on a white background a black and white illustration of a spiral pattern on a white background royalty free illustration"], "captions_pred_image": ["a long white plastic strip on a gray background", "a 3d model of a white chair royalty free 3d model no. 3"], "question": "which object has a curved backrest", "label": 1}, {"captions": ["a 3d object featuring a rock, shell, piece of paper, and cat.", " a green mountain with trees and grass."], "sample_ids": ["53efab50e5a74e5ea165c763cea15be4", "08fb23bdc67b4b0ba5fc64ea5c97e5f7"], "properties": ["a, rock, paper", "mountain, grass, tree"], "captions_pred_pc": ["for a flock of birds in the sky", "in 15 words or less a black ink blot on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of a piece of paper in the shape of an island", "a 3d model of a mountain with snow on it"], "question": "which entity has more grass", "label": 1}, {"captions": [" a white and black horned demon with bunny features.", "a 3d white axe, hammer, and spoon."], "sample_ids": ["1d3537d1341a423f89990e9b06924904", "96d127abd21049689918e671ec613ef8"], "properties": ["color, horns, features", "axe, hammer, spoon"], "captions_pred_pc": ["above a black and white image of a pair of headphones", "of a black lace belt on a white background"], "captions_pred_image": ["a 3d model of a skull with horns on its head", "a 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe"], "question": "which object has a hammer?", "label": 1}, {"captions": [" of a multi-colored spooling machine with wires and circuit board, featuring a lamp and shelf with toys.", "a white of a man with arms outstretched."], "sample_ids": ["0e3f5cc16806492b948d41a748819ce3", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["color, shape, material", "image, color, white"], "captions_pred_pc": ["a black and white image of a decorative tile", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a 3d rendering of an electronic device on a white surface", "a 3d model of a man with his arms outstretched"], "question": "which entity is a white image?", "label": 1}, {"captions": [" of a white plastic bar, resembling a shelf or towel rod, with a metal light fixture.", " a clay pot with holes in it."], "sample_ids": ["f84dec547a3d4710b48db11fc9fa489a", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["light source, fixture, color", "hole, material, clay"], "captions_pred_pc": ["a black and white photo of a small square on a white background", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a white object on a gray background", "a clay sculpture of a face with holes in it"], "question": "which object is made of clay", "label": 1}, {"captions": [" a small white building featuring a room with a door and a white shelf.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["d7b78fa9a6b64f6095b881bc619b04fe", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["room, door, shelf", "island, terrain, water"], "captions_pred_pc": ["above a black and white illustration of a person standing in front of a door", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of an empty room", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" of a white sculpture, resembling a horse and paper plane, on a gray background.", " a small house with a pond and situated on a rock."], "sample_ids": ["179b4438edfc4a43a27a83784f38ff4b", "92859eb82a344134806b37cc209927c6"], "properties": ["color, background, white", "house, rock, pond"], "captions_pred_pc": ["above a black and white image of a sculpture in the shape of a bird", "in 15 words or less a black and white drawing of a toaster"], "captions_pred_image": ["a 3d printed sculpture of a horse's head on a gray background", "a 3d model of a house in the middle of a field"], "question": "which entity is situated on a rock", "label": 1}, {"captions": ["a baseball card in a clear plastic case.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["5046b4bb61e4480a8c0e8911c2053a04", "c3a82df41875402285608ef13a55df57"], "properties": ["case, baseball, card", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["above a black and white image of a pencil on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a black and white photo of a hockey player's autographed jersey hanging on a wall", "a white plastic object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": [" of a lantern on a table.", " a small house with a roof and door, resembling a shack or shed."], "sample_ids": ["500a64623ef1441db994bfc50e278ff6", "f1b557775310478893242180defa4d80"], "properties": ["a, table, lantern", "shack, roof, door"], "captions_pred_pc": ["a black and white illustration of a hexagonal shape made up of small dots on a white background a black and white illustration of a hexagonal shape made up of small dots on a white background royalty free illustration", "a black and white illustration of a telephone on a white background"], "captions_pred_image": ["a 3d model of a lantern on a table", "a 3d model of a small house in the middle of a field"], "question": "which entity is a shack?", "label": 1}, {"captions": [" of a multi-colored spooling machine with wires and circuit board, featuring a lamp and shelf with toys.", " a staircase with a railing, table, and chair, featuring a square ceiling light."], "sample_ids": ["0e3f5cc16806492b948d41a748819ce3", "51a0fba79bce472a8b827b78a110b77f"], "properties": ["color, shape, material", "stair, table, chair"], "captions_pred_pc": ["a black and white image of a decorative tile", "for a black and white image of a toilet paper holder"], "captions_pred_image": ["a 3d rendering of an electronic device on a white surface", "a 3d model of a staircase in a room"], "question": "which entity has a table and chair?", "label": 1}, {"captions": [" a crab with long legs, wires, and tentacles, resembling a hybrid of an octopus, squid, spider, and robot.", " a small table with a staircase and a square ceiling light."], "sample_ids": ["ec5914b53b6a4cde8de4820050bc46c5", "ef0cbf4628b74253b885480deefe6fbd"], "properties": ["resembles, octopus, squid, spider, robot", "table, staircase, light"], "captions_pred_pc": ["a black and white illustration of a jellyfish", "for a black and white image of a small square on a white background"], "captions_pred_image": ["a 3d model of a robotic octopus on a white background royalty free 3d model preview no.2", "a 3d model of a table with a staircase"], "question": "which entity has a light?", "label": 1}, {"captions": [" a purple chair with holes in it.", " a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole."], "sample_ids": ["833151c8e0f4489a9fa966635a948452", "b16fb21cda9a4a21a024df749c2304f4"], "properties": ["color, purple, holes", "roof, ceiling, hole"], "captions_pred_pc": ["of a silver pendant with an intricate design", "a black and white image of a square with dots on it"], "captions_pred_image": ["a 3d model of a white chair", "a 3d model of a small house and a tree in the foreground"], "question": "which entity has a roof and a square ceiling with a hole?", "label": 1}, {"captions": [" a robotic warrior with a sword.", " a table with pillar-like yellow and white poles, featuring a wooden structure, large tent, glass roof, and a chandelier with numerous glass tubes."], "sample_ids": ["1c54afa26eb24e19b8660066718a9c5a", "fa06167d83e54b05bdfbeeae2ca7c8a6"], "properties": ["weapon, sword, robot", "table, structure, roof"], "captions_pred_pc": ["a black and white image of a small white object on a black surface", "a black and white image of a map with dots"], "captions_pred_image": ["a black and white image of a robot standing on a piece of paper", "a 3d model of a building with many pillars"], "question": "which entity has a roof", "label": 1}, {"captions": [" a house with roof trusses and wooden beams on a suspended ceiling.", " a building with a purple, glass roof and a suspended ceiling featuring beams."], "sample_ids": ["3c2e3a3670b042069bd8290e2c357702", "a54c746bb2644e3ea4e53ee65e32df64"], "properties": ["roof trusses, beams, suspended ceiling", "roof, glass, purple, ceiling, beams"], "captions_pred_pc": ["above a black and white drawing of a building", "the letter 't' is made up of tiny white dots on a white background"], "captions_pred_image": ["a 3d model of a house with a roof in progress royalty free 3d model preview no. 1", "a 3d model of a building with a roof"], "question": "which building has a purple roof?", "label": 1}, {"captions": [" a building or house with a roof and floor plan, resembling a pyramid with a flat roof.", " an african figurine with a bow and arrow, standing on a wooden base, wearing a hat."], "sample_ids": ["7a91292e1ed64e60a1bbbb499209a0df", "d81d13362ae04371bb2cba46e4939665"], "properties": ["apse, roof, floor plan", "hat, bow, arrow"], "captions_pred_pc": ["a black and white drawing of a room", "above a black and white photo of an ice sculpture"], "captions_pred_image": ["a 3d model of a building in the shape of a pyramid", "a sculpture of an african man sitting on a pedestal"], "question": "which entity has a hat", "label": 1}, {"captions": [" of a purple laundry rack with multiple lines resembling wires.", " of earphones with wire sculpture, musical instrument, and hanging light fixture on a white shelf."], "sample_ids": ["1f0dce1431a842b8bdb24ac4dd05f386", "4ec4c815539f4f3f8cc2272e09f2aa41"], "properties": ["color, shape, material", "earphones, wire, instrument"], "captions_pred_pc": ["a black and white image of a metal structure", "of a curved metal rod on a white background"], "captions_pred_image": ["a 3d model of a wire rack on a white background", "a 3d model of a wire sculpture on a white surface royalty free 3d model preview no.3"], "question": "which entity is a wire sculpture?", "label": 1}, {"captions": [" a green, spiked cube-like creature with eyes and a liquid-like texture.", " a wooden billiard table with legs."], "sample_ids": ["0d2850148a154f3ca72d72817e120759", "b20ad62516fa467ba6e8de063998e8e4"], "properties": ["texture, spikes, eyes", "legs, material, wood"], "captions_pred_pc": ["a black and white drawing of a flower", "a black and white drawing of a rectangular shaped object"], "captions_pred_image": ["a 3d model of an ice cube on a white background royalty-free 3d model preview no. 3", "a black and white image of a pool table"], "question": "which entity has legs", "label": 1}, {"captions": ["white cylindrical 3d object resembling a roll of soap or tube of cream.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["d8d6903a2dd54b8e9aa12fad2155b372", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["cylinder, white, roll", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["in 15 words or less a black and white image of a dotted object on a white background", "a black and white image of a cone shaped object"], "captions_pred_image": ["a white pillow on a gray background", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a white bunny head", "star wars stormtrooper "], "sample_ids": ["260dfca84cc64848a6ea0fb74e0f4c92", "05678f11f4fe47178c9b4941ee334e16"], "properties": ["color, white, head", "a, color, white"], "captions_pred_pc": ["a black and white illustration of a microscopic image of a cell", "a black and white illustration of a stormtrooper"], "captions_pred_image": ["a 3d white bunny head on a gray background", "a 3d model of a star wars stormtrooper"], "question": "which is not a white color", "label": 0}, {"captions": ["a low poly of a plant on a white object, resembling a paper or plastic bag.", " of a character wearing glasses and a hat."], "sample_ids": ["d49d8ed244094349a99e4faca05e0690", "032e7ce682ff43d5aa5ca4fd34eacf14"], "properties": ["low poly, plant, white", "hat, glasses, character"], "captions_pred_pc": ["a black and white illustration of a dandelion on a white background a black and white illustration of a dandelion on a white background royalty free illustration", "in 15 words or less a silhouette of a bell on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a plant growing out of a rock", "a 3d model of a person wearing sunglasses"], "question": "which entity is a character?", "label": 1}, {"captions": [" a bridge structure with a pier, stairway, railings, and red and green elements.", "star wars stormtrooper "], "sample_ids": ["23701a9674204ea881fb8af9914b1924", "05678f11f4fe47178c9b4941ee334e16"], "properties": ["color, pier, railings", "a, color, white"], "captions_pred_pc": ["a black and white illustration of a computer keyboard", "a black and white illustration of a stormtrooper"], "captions_pred_image": ["a 3d model of a staircase in the middle of a field", "a 3d model of a star wars stormtrooper"], "question": "which entity is white", "label": 1}, {"captions": ["a featuring a pile of food, leaves, shredded paper, and rocks with scattered broken paper pieces.", " a house featuring a roof with wooden trusses and a ladder."], "sample_ids": ["5206d4d96c2d428b9c1f7ee0e13bcffb", "3cd410c4359a4cef98702963a2b9802b"], "properties": ["food, leaves, shredded paper, rocks", "roof, trusses, ladder"], "captions_pred_pc": ["a black and white image of a bird in flight", "a black and white drawing of a tv screen on a white background"], "captions_pred_image": ["a 3d model of a mountain range on a white surface", "a 3d model of the roof of a building"], "question": "which entity is a building?", "label": 1}, {"captions": [" of a pillow featuring a hot dog shape and a bag of chips with a dragon design.", " a damaged room with destroyed furniture, featuring a kitchen, bathroom, and broken window."], "sample_ids": ["4ae6ac813d584d12a5d5d608a595bfe5", "06dd6456dc244a51b6b6e1c8524820de"], "properties": ["shape is hot dog, design is dragon, color is black", "room, furniture, window"], "captions_pred_pc": ["a black and white illustration of an eye with dots", "above a black and white drawing of a person sitting on a bench"], "captions_pred_image": ["a black and white photo of a pillow with a soda can on it", "a 3d image of a room with a person in it"], "question": "which entity is a room?", "label": 1}, {"captions": [" of a white building with a small house and a desk with a laptop.", " a large house with a roof on a platform."], "sample_ids": ["9244a2d3a9e94c8398ef991f1661bb58", "cb3e09a301b746918a682a595037c7f7"], "properties": ["a, desk, laptop", "roof, platform, house"], "captions_pred_pc": ["a black and white image of a piece of furniture", "a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of an office desk on a white background", "a 3d model of a small house"], "question": "which house has a roof on a platform?", "label": 1}, {"captions": ["a pixelated of a striped brown and white coffee mug.", " a stone throne with stairs and a tree, accompanied by a concrete wall featuring a statue and a fireplace with a shelf above it."], "sample_ids": ["656fca269cb042e68b1fb5b629bfa873", "93fb4197f0014f7582029af24c7ed9de"], "properties": ["mug, color, brown, white", "throne, stairs, tree"], "captions_pred_pc": ["a black and white illustration of a circle made up of many small dots", "in 15 words or less a black and white image of a toilet paper roll on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a coffee cup with multiple layers", "a 3d model of a throne with a tree on it"], "question": "which entity has a fireplace?", "label": 1}, {"captions": [" a room featuring a wall with a painting, a hole, and a door.", " a house with a roof and beams."], "sample_ids": ["1d1328346a464d2482463d6d5288e934", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["painting, door, wall", "roof, beams, house"], "captions_pred_pc": ["in one hundred words or less an illustration of an igloo on a white background stock illustration", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a black and white photograph of a torn piece of paper in the shape of a bird", "a 3d model of a building with a roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" a white rock formation with pebbles and ice elements.", " a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light."], "sample_ids": ["94d94f5a75de4bd0a43b08609630876e", "5ea0962b100b4fccb761ed84afe027b5"], "properties": ["color, shape, texture", "house, table, chair"], "captions_pred_pc": ["a black and white drawing of a pair of shoes on a white background royalty free illustration", "above a black and white photograph of an open door"], "captions_pred_image": ["a 3d model of a small rocky island in the middle of a body of water", "a 3d rendering of a small white table with a chair"], "question": "which entity is a building?", "label": 1}, {"captions": ["a white of a helmet and airplane wing.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["17f3bb7773ef4b2ea76134896b105fbf", "c3a82df41875402285608ef13a55df57"], "properties": ["color, helmet, airplane wing", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white image of a person's head with dots all over it", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a white helmet on a gray background", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": ["a featuring a phone booth, desk, chair, lamp, ladder, and two additional chairs.", " a large metal building with a roof and truss structure."], "sample_ids": ["7da804ad2b554c9a9915d775afb015d3", "b85a99699ccd4bcba213322113bb253d"], "properties": ["desk, chair, lamp", "roof, truss, structure"], "captions_pred_pc": ["a black and white illustration of a city skyline", "of a metal grate on a white background"], "captions_pred_image": ["a 3d rendering of a desk and chair in a room", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": [" a small, modern house with a green roof, located on a hill, surrounded by trees, grass, and a pond.", " a stack of books, a pile of paper, and a lamp with a black and white shade."], "sample_ids": ["a452d5381dad4dc09f5ebe10635ae5fe", "6a06b505bcb34026a07ac15931f9f6f3"], "properties": ["house, roof, green", "books, paper, lamp"], "captions_pred_pc": ["above an illustration of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white", "a black and white silhouette of a map of the state of new york"], "captions_pred_image": ["a 3d model of a building with a black roof", "a black and white photograph of a toilet paper holder"], "question": "which object is not a pile of paper?", "label": 1}, {"captions": [" a house with a roof, roof truss, and suspended ceiling structure.", " a house featuring a roof with truss system, framing, insulation, and a ceiling light."], "sample_ids": ["5abf69f79b92484fb54d41ff0c0a2c11", "39876e69e3914d99a07e0dc59611c5c0"], "properties": ["roof, truss, suspended ceiling", "roof, truss system, framing"], "captions_pred_pc": ["a suitcase made of dots on a white background a suitcase made of dots on a white background royalty free illustration", "a black and white drawing of a window with dots all over it"], "captions_pred_image": ["a 3d model of a house with roof trusses", "a 3d model of the roof of a house"], "question": "which entity has a roof with truss system", "label": 1}, {"captions": [" a red and blue metal-framed building structure with a steel house frame.", " a white plastic container with a lid, a small box, a cup, a bottle, and a jar."], "sample_ids": ["a453ac83ef1e4f76bd7978451888d5f5", "20a02705a66f460492e07345e84a62ed"], "properties": ["color, material, frame", "a box, a cup, a bottle, a jar"], "captions_pred_pc": ["of a black and white photo of a bike on a white background", "a black and white pattern of dots on a white background"], "captions_pred_image": ["a 3d model of a metal frame structure", "a 3d model of a plastic bottle, a plastic cap, and a plastic container"], "question": "which entity is not a building structure?", "label": 0}, {"captions": ["a 3d white ring adorned with flowers and leaves.", "\"multiple white cubes arranged in a row on a gray background.\""], "sample_ids": ["8219c3b38ad547268e1828b9c3a487f1", "17c8222d4ce04e518117078e7de6aaed"], "properties": ["color, shape, material", "color, background, white"], "captions_pred_pc": ["a heart made of black dots on a white background a heart made of black dots on a white background illustration", "a black and white image of a box with the words 'box 2' and 'thoughtful'"], "captions_pred_image": ["a 3d printed ring in white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white 3d printed white", "an image of a white background with a few small cubes on it"], "question": "which object is white", "label": 0}, {"captions": ["a small 3d purple teapot and elephant.", "a white glass beer mug."], "sample_ids": ["4a27592dc8164f709b44446ee11832c0", "1d686cbd3e9a4c629a43088658989286"], "properties": ["color, shape, material", "color, white, glass"], "captions_pred_pc": ["a black and white 3d illustration of an elephant's head on a white background 3d illustration of an elephant's head on a white background royalty free illustration", "a black and white drawing of a beer mug on a white background"], "captions_pred_image": ["a white ceramic teapot on a gray background", "a 3d model of a glass pitcher"], "question": "which object is made of glass", "label": 1}, {"captions": ["a white ceramic vase with the words 'happy' and 'fish' written on it.", "a black and gold cylindrical object with a gold handle, resembling a cigarette lighter, lamp, and pen."], "sample_ids": ["243cd2c469984313b1522dca099eefd3", "c9b1c89380e947f58aa06eb56c93c6d8"], "properties": ["color, white, material, ceramic", "- color is black and gold- shape is cylindrical- material is metal"], "captions_pred_pc": ["a black and white image of a vase in the shape of a fish", "a black and white image of a circular object on a white background"], "captions_pred_image": ["a white vase on a grey background", "a 3d model of a black and white object on a gray background"], "question": "which object is made of metal", "label": 1}, {"captions": ["a collection featuring a furnished room, destroyed building, us map, house with roof, flying plane, and a ring with paper.", " a house with a roof and beams."], "sample_ids": ["f13d2d1d78cd49e78f3430abbb251edd", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["collection, room, destroyed, house, roof, plane, ring, paper", "roof, beams, house"], "captions_pred_pc": ["a black and white image of a person's face", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a black and white photograph of a person sitting on a couch", "a 3d model of a building with a roof"], "question": "which house has a roof and beams", "label": 1}, {"captions": [" a small house featuring a wooden floor, stairs, a bathroom with a sink and toilet, and a white box on a table.", " a house with a roof and beams."], "sample_ids": ["73f2780847f14547b9ae5f9e8a81e348", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["floor, stairs, bathroom", "roof, beams, house"], "captions_pred_pc": ["a black and white image of a leopard print pattern", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a small table", "a 3d model of a building with a roof"], "question": "which house has a roof and beams", "label": 1}, {"captions": [" a white table.", "a featuring a green frog face, bunny head, dragon head, flower, monster with a tail, and a monster mask with an open mouth."], "sample_ids": ["19a580515406462d9e73213276a8e12d", "127753bf17de4252aaa7ea88f274545e"], "properties": ["color, material, shape", "face, mask, tail"], "captions_pred_pc": ["a black and white image of a patterned surface", "a 3d model of an orchid flower on a white background 3d model of an orchid flower on a white background royalty free illustration"], "captions_pred_image": ["a white bench on a white background", "a 3d model of a goat's head"], "question": "which entity is a mask?", "label": 1}, {"captions": [" the white text \"ekberkaslan\" with a row of white cubes and a numbered box.", " a house with wooden framing and trusses."], "sample_ids": ["87ee30d475f34b799c24bf7ef3a7b540", "4501794e257c4a8ba60a94757d8e93a9"], "properties": ["- color is white- shape is cubes- number is 1", "frame, trusses, wood"], "captions_pred_pc": ["a close up of a black and white striped scarf", "a black and white drawing of a window"], "captions_pred_image": ["a 3d image of the word ebercaskalan on a white background", "a 3d model of a house under construction"], "question": "which entity is made of wood", "label": 1}, {"captions": ["a featuring a sailboat with a hook, a bird suspended in the air, and a pair of scissors.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["f57ae66555d34349aeadc38b33f8f267", "06a1c233fb444830b577aa06e2c01294"], "properties": ["a, bird, hook", "house, tree, hill"], "captions_pred_pc": ["of a 3d scan of a person's torso and limbs", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a black and white photo of a kite flying in the sky", "a black and white image of a house in the middle of a field"], "question": "which entity has a house?", "label": 1}, {"captions": [" a spacious office featuring numerous desks, chairs, computers, and a ceiling with blue and white triangles and a ceiling light.", " a flying bird, resembling a crow and a pigeon."], "sample_ids": ["0d7e4d9471414a21b4a5b18a54f7ec22", "5ec78c8b6ab54f739adb0b46d216a454"], "properties": ["ceiling, light, desks", "bird, resembles, crow, pigeon"], "captions_pred_pc": ["a black and white drawing of a square on a white background", "above a black and white illustration of an airplane on a white background"], "captions_pred_image": ["a 3d model of an office space with desks and chairs", "a black and white image of a bird in flight"], "question": "which entity is a bird?", "label": 1}, {"captions": [" a green cucumber wearing a white hat, tie, and white cream, accompanied by a green bottle with a white cap, a cartoon character in a green shirt, and a green and black bomb.", " a stone throne with stairs and a tree, accompanied by a concrete wall featuring a statue and a fireplace with a shelf above it."], "sample_ids": ["0434eb3aa73e42a5a2570ec9c1a9b96b", "93fb4197f0014f7582029af24c7ed9de"], "properties": ["green, hat, cream", "throne, stairs, tree"], "captions_pred_pc": ["a black and white image of a pair of earrings", "in 15 words or less a black and white image of a toilet paper roll on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a vibrator in a grey and white color scheme royalty-free 3d model no.", "a 3d model of a throne with a tree on it"], "question": "which entity has a throne?", "label": 1}, {"captions": ["a featuring a fireplace, wooden bench, sled, log, castle, and cat.", " a house with a roof and beams."], "sample_ids": ["fe4cd6c3972940a5b7854ca1ebc8a5a3", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["fireplace, bench, log", "roof, beams, house"], "captions_pred_pc": ["a black and white illustration of a snowflake", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a fireplace with snow on the ground", "a 3d model of a building with a roof"], "question": "which entity has a roof?", "label": 1}, {"captions": [" a small house with a roof and ceiling-mounted air conditioner.", " a white building with a red roof."], "sample_ids": ["6965067ea9e34357a7af21a2f078fbac", "1f9580be397d4f948bf53fe1d5bc5756"], "properties": ["roof, air conditioner, house", "color, white, roof, red"], "captions_pred_pc": ["a black and white illustration of a window", "in 15 words or fewer a black and white drawing of a camera"], "captions_pred_image": ["a 3d rendering of a small house with a covered porch", "a 3d model of an office building"], "question": "which building has a red roof", "label": 1}, {"captions": ["white s of a wall-mounted light, toilet with handle, faucet, and lamp with a light bulb.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["92052c493bf141a08b56f30f9c5e2d61", "06a1c233fb444830b577aa06e2c01294"], "properties": ["light, toilet, faucet", "house, tree, hill"], "captions_pred_pc": ["in 15 words or less a 3d illustration of an object made of dots on a white background 3d illustration of an object made of dots on a white background royalty free illustration", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a white plastic toilet paper holder on a gray background", "a black and white image of a house in the middle of a field"], "question": "which entity has more trees", "label": 1}, {"captions": [" a house with a pink roof, truss, and a square white ceiling lamp.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["91b2e9e4660946f5b4808a18b5323b69", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["roof, truss, lamp", "camera, speaker, ceiling fan"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a house with a metal roof", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": ["a featuring a bowl of soup, a brush, a knife, a slice of pizza, a torn piece of paper, and a roof with a hole in it.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["db19e46828c94b1a8b9a6ca9f673c604", "c3a82df41875402285608ef13a55df57"], "properties": ["a, roof, soup", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["in 15 words or fewer a black and white illustration of the moon", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a bowl and chopsticks on a sheet of paper", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" orange and yellow fish, candy, and a pair of shoes.", "a gold ring featuring various animal designs, including a swan, snake, bird, and dragon, accompanied by a gold-plated frog sculpture."], "sample_ids": ["0fa2a605d7e940e5946f63c0f74234f3", "5e8319ec9a824ddcb3eef89658ef90f8"], "properties": ["color, shape, and material", "gold, ring, animal"], "captions_pred_pc": ["a black and white photograph of a group of plastic cups", "above a black and white image of a shark"], "captions_pred_image": ["a 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes", "a 3d sculpture of a bird with its wings outstretched"], "question": "which entity is made of gold", "label": 1}, {"captions": [" a blue circuit board with electronic components.", " a red \"welcome to northwich\" billboard on a wooden base."], "sample_ids": ["4816a2780af54492b6692fd78347f1ac", "225e4094141d416faba7c5598dc55ff8"], "properties": ["color, blue, components", "base material is wood, color is red, message is welcome to northwich"], "captions_pred_pc": ["a black and white drawing of a person sitting on a bench", "a black and white illustration of a circular object with many small dots on it"], "captions_pred_image": ["a 3d printed circuit board with various electronic components", "a 3d model of a chalkboard on a pedestal royalty free 3d model preview no.2"], "question": "which object is made of wood", "label": 1}, {"captions": ["green toy sand bucket and shovel with a squirt gun.", " a blue curved object, resembling a wall, box, shelf, and hat."], "sample_ids": ["ae173b4afc4d4b0499f1e4e55d647c06", "73e7107ff3634516846bccad04c5dc87"], "properties": ["color, green, squirt gun", "blue, curved, resembles"], "captions_pred_pc": ["a bucket with a sponge and a sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in", "a black and white illustration of a piece of jewelry"], "captions_pred_image": ["a 3d model of a bucket and shovel royalty free 3d model preview no.2", "a 3d rendering of a white bathroom sink"], "question": "which object is blue?", "label": 1}, {"captions": [" a set of yellow and black tools, including a magnifying glass, hexagonal keys, and screwdrivers.", " of a black tray with three green rings on it."], "sample_ids": ["40285a60f32749a8ae38957c7b073fe8", "77d47814be1c4d1e8fd6207af2f4e095"], "properties": ["color, yellow, black", "color, black, rings"], "captions_pred_pc": ["a set of three black and white keychains on a white background", "a black and white image of three dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a pair of hexagonal keys and a pair of hexagonal scissors royalty free 3d model preview no 3", "a 3d rendering of a tray with three rings on it"], "question": "which object is black", "label": 1}, {"captions": [" a blue and white building structure with a table and suspended ceiling.", " of a small white building or house with a white ceiling and kitchen hood."], "sample_ids": ["fc63507c452c4a4b89f614f2b8bff76a", "17b23d23309d4385938ced3ca536a1d1"], "properties": ["color, table, ceiling", "building, ceiling, kitchen"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a room", "above a black and white drawing of a bathroom with a toilet and sink"], "captions_pred_image": ["a 3d model of a table with multiple tables stacked on top of each other", "a 3d model of a white building on a gray background"], "question": "which building has a kitchen?", "label": 1}, {"captions": [" a white and gold mirror on a wooden easel stand.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["0d10d734448d4a5d8d07b938c12d9d80", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, white, gold", "a, material, clay"], "captions_pred_pc": ["for a black and white image of a shoe on a white background", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a standing mirror on a white background", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": ["a featuring a small island with a mountain, a lake in the middle, and a bird flying above.", " an african figurine with a bow and arrow, standing on a wooden base, wearing a hat."], "sample_ids": ["e51fead89ae64968b2ca7f4ccb6d3b97", "d81d13362ae04371bb2cba46e4939665"], "properties": ["a, bird, lake", "hat, bow, arrow"], "captions_pred_pc": ["a black and white photograph of a cloudy sky", "above a black and white photo of an ice sculpture"], "captions_pred_image": ["a 3d model of a mountain range in black and white", "a sculpture of an african man sitting on a pedestal"], "question": "which entity has a hat?", "label": 1}, {"captions": ["a 3d white box with an open door and lid.", " a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog."], "sample_ids": ["4e95f0eca97f48d6af1888a8bacec9f6", "f6c6e7a65a3e42dfa431b1d984c72f28"], "properties": ["- color is white - shape is box - material is plastic", "house, fence, dog"], "captions_pred_pc": ["a black and white square with dots all over it", "above a black and white drawing of a bathroom"], "captions_pred_image": ["a 3d rendering of a white box with an open lid", "a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a"], "question": "which entity is made of wood", "label": 1}, {"captions": ["a small white wooden castle with flags, turrets, and two towers.", " a house with a roof and beams."], "sample_ids": ["345b1c7d2d9b4524aa0dcdc3cd27e4da", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["turrets, flags, towers", "roof, beams, house"], "captions_pred_pc": ["a black and white illustration of a square made up of many small dots on a white background", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a castle with towers and a drawbridge royalty-free 3d model preview no.1", "a 3d model of a building with a roof"], "question": "which entity has a roof and beams", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", " a white building with a red roof."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "1f9580be397d4f948bf53fe1d5bc5756"], "properties": ["color, shape, and size", "color, white, roof, red"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "in 15 words or fewer a black and white drawing of a camera"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d model of an office building"], "question": "which entity is a building?", "label": 1}, {"captions": [" a white box with colored buttons on it.", "a white of a woman with her arms outstretched."], "sample_ids": ["5a5269e17d134e238ec2b256405d8c10", "2cbfaa4fb2d84a6f94e67b4fd6e2f26f"], "properties": ["color, shape, material", "image, color, white"], "captions_pred_pc": ["in 15 words or less a black and white patterned scarf on a white background royalty free illustration", "a black and white silhouette of a person on a skateboard"], "captions_pred_image": ["a 3d rendering of a white box with two buttons", "a 3d model of a woman with her arms outstretched"], "question": "which entity is a white image?", "label": 1}, {"captions": ["a white ceramic vase with the words 'happy' and 'fish' written on it.", " a snowy small village with farm buildings and a fence."], "sample_ids": ["243cd2c469984313b1522dca099eefd3", "6bb669534ccc434f9ab4d7b39bae3510"], "properties": ["color, white, material, ceramic", "building, fence, snowy"], "captions_pred_pc": ["a black and white image of a vase in the shape of a fish", "a black and white drawing of a boat on the water"], "captions_pred_image": ["a white vase on a grey background", "a 3d model of a small village in the snow royalty free 3d model preview no. 3"], "question": "which is not a building", "label": 1}, {"captions": [" of a white chair with arms and legs.", " of a wooden chair."], "sample_ids": ["61ec56afad7a45deb99ccf1ab1bd2d73", "b2fc4bd184944704ad43d3a31817a676"], "properties": ["Arms, Legs, Color", "wood, chair, seat"], "captions_pred_pc": ["of a 3d rendering of a chair in the style of the 1920s and 1930s", "a close-up view of the bristle brush on a white background"], "captions_pred_image": ["a 3d model of a white outdoor chair royalty free preview no 3", "a 3d model of a chair"], "question": "which chair has a seat made of wood", "label": 1}, {"captions": [" a red and blue metal-framed building structure with a steel house frame.", " a large white and metal building with a metal roof structure."], "sample_ids": ["a453ac83ef1e4f76bd7978451888d5f5", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["color, material, frame", "roof, metal, white"], "captions_pred_pc": ["of a black and white photo of a bike on a white background", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a 3d model of a metal frame structure", "a 3d model of a large white box"], "question": "which building has a white roof", "label": 1}, {"captions": ["s of a cat, toilet, white bird with black eyes, stuffed animal, man in a white shirt, paper airplane, and cow.", " a small house with trees and plants, featuring a white box with a blue lid and a green triangle, accompanied by a christmas tree."], "sample_ids": ["7adf9de5fb734455a3a3a7f084e3d628", "4a889132cc444d10bfcbf6c760984416"], "properties": ["cat, toilet, white bird with black eyes, stuffed animal, man in a white shirt, paper airplane, cow", "a, color, white"], "captions_pred_pc": ["a black and white image of a flying saucer", "a black and white illustration of a dandelion on a white background dandelion illustration on a white background stock illustration \u00a9 iStock/Getty Images"], "captions_pred_image": ["a black and white image of a fighter plane flying upside down", "a 3d model of a desk and chair"], "question": "which entity has a white bird with black eyes?", "label": 0}, {"captions": [" a white rock with green grass and moss on it.", "three white paper windmills and a city model with a nativity scene silhouette."], "sample_ids": ["0b53ca3fca7b4ef7bef27cb287daf32e", "fa5ee6165f31465d9d75d046818f4006"], "properties": ["color, grass, moss", "windmills, silhouette, city model"], "captions_pred_pc": ["above a black and white drawing of a long, curved line on a white background", "a black and white photo of a pair of sunglasses"], "captions_pred_image": ["a 3d sculpture of a piece of white chocolate in the shape of a map", "a 3d model of a cityscape on a white background"], "question": "which entity is a silhouette?", "label": 1}, {"captions": [" of a small house featuring a flat, brown roof, table with two chairs and a stool, ceiling light, and a window.", " of a barrel and a cube together."], "sample_ids": ["3a509431d96b43f8a7aebe2846f08b96", "f00b7661daf544b68cddf85d7d0308c7"], "properties": ["roof, brown, flat", "a, barrel, cube"], "captions_pred_pc": ["a black and white drawing of a snowflake on a white background a black and white drawing of a snowflake on a white background royalty free illustration", "a black and white illustration of a 3d cube and a 3d sphere"], "captions_pred_image": ["a 3d rendering of a table and stool", "a 3d model of a barrel and a box next to each other royalty free 3d model preview no.3"], "question": "which object is not a barrel?", "label": 0}, {"captions": [" an old lantern with moss on it.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["62925f26fd624310bd6b31136fe8e706", "c3a82df41875402285608ef13a55df57"], "properties": ["moss, lantern, old", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["in 15 words or less the image is a silhouette of a person standing on top of a pyramid made of dots", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a black and white image of an old lantern", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" featuring a chair, table, and refrigerator.", " a large metal building with a roof and truss structure."], "sample_ids": ["cd967e38e9364ed28c4090e905740c9d", "b85a99699ccd4bcba213322113bb253d"], "properties": ["chair, table, refrigerator", "roof, truss, structure"], "captions_pred_pc": ["a 3d rendering of a white surface with black dots on it", "of a metal grate on a white background"], "captions_pred_image": ["a 3d model of a chair, a table, and a refrigerator", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", "a yellow and blue toy submarine."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "da2719db8f4f4668af5b74c96e80c6cd"], "properties": ["color, material, structure", "color, yellow, blue"], "captions_pred_pc": ["a black and white drawing of a room with dots", "of a black and white image of a starfish on a white background"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a 3d model of a submarine toy"], "question": "which entity is a toy?", "label": 1}, {"captions": [" a pink room featuring a bed, desk, window, and lamp.", " of a small white building with stairs and a lid."], "sample_ids": ["395af20de6fe49dbbbb030f0e452cbe1", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["bed, desk, window", "building, stairs, lid"], "captions_pred_pc": ["of a black and white drawing of a curved line", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a bedroom royalty free 3d model preview no.2", "a 3d model of a white box on a gray background"], "question": "which entity has a lid?", "label": 1}, {"captions": [" a room featuring a wall with a painting, a hole, and a door.", " of a small white building with stairs and a lid."], "sample_ids": ["1d1328346a464d2482463d6d5288e934", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["painting, door, wall", "building, stairs, lid"], "captions_pred_pc": ["in one hundred words or less an illustration of an igloo on a white background stock illustration", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a black and white photograph of a torn piece of paper in the shape of a bird", "a 3d model of a white box on a gray background"], "question": "which entity has a lid?", "label": 1}, {"captions": [" a small house on an island with trees, shrubs, a pool, and a lake.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["c8331489fca44685bedfa1bdadf6ccb3", "bf18bfd89efd43389781050230467d58"], "properties": ["house, lake, pool", "Lights, number, five"], "captions_pred_pc": ["a black and white image of a pattern on a piece of paper", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a large building", "a white chandelier with five white shades"], "question": "which entity has fewer lights", "label": 1}, {"captions": [" a futuristic space station featuring a bench, computer desk with a laptop, small coffee machine, printer, and computer monitor.", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["9b8e2f9070b24956a343a01a5fabdf03", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["computer desk, laptop, monitor", "lizard, rock, stuffed animal"], "captions_pred_pc": ["a black and white silhouette of a traditional japanese gate", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a futuristic room with a bench", "a 3d model of a vehicle with wheels and tires"], "question": "which entity has a stuffed animal?", "label": 1}, {"captions": ["a white of a helmet and airplane wing.", " a rusty green metal box with a handle and a gun inside."], "sample_ids": ["17f3bb7773ef4b2ea76134896b105fbf", "76cfd0e88ce243d483919a018a4f1a9e"], "properties": ["color, helmet, airplane wing", "box, handle, gun"], "captions_pred_pc": ["a black and white image of a person's head with dots all over it", "a black and white square with dots on a white background"], "captions_pred_image": ["a 3d model of a white helmet on a gray background", "a 3d rendering of a metal box with a handle"], "question": "which object has a handle", "label": 1}, {"captions": [" a small white building with a floor plan and ceiling light.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["1df55bb7035941cc9829aa904e2af065", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["floor plan, ceiling light, color", "camera, speaker, ceiling fan"], "captions_pred_pc": ["a line of dots on a white background a line of dots on a white background royalty free illustration", "for a black and white image of an object on a white background"], "captions_pred_image": ["a white 3d model of a house", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a ceiling fan", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", " a blue and white sphere on a pedestal, with a blue and gold pillar and statue featuring a blue and white crown."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "e34088fc7b5344c4ab29fef067750225"], "properties": ["color, shape, and size", "sphere, pedestal, statue"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "the image is a black and white illustration of a toilet bowl with a toilet seat on top of it the toilet bowl is made up of tiny dots and the toilet seat is also made up of tiny dots royalty free illustration"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d model of a sculpture on top of a pedestal"], "question": "which entity has a sphere on a pedestal?", "label": 1}, {"captions": ["a 3d blue star featuring various text, including \"prchen,\" \"bible chen,\" \"rib chicken,\" and \"birch chen.\"", " a wooden shed with a gray roof."], "sample_ids": ["0b712fc68ad44ce8a33592d2b26aac18", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["color, shape, text", "roof, color, gray"], "captions_pred_pc": ["a black and white image of a pair of sunglasses", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a white paper airplane flying over a gray background", "a 3d model of a shed with a gray roof"], "question": "which entity has a roof that is gray", "label": 1}, {"captions": ["s of a cat, toilet, white bird with black eyes, stuffed animal, man in a white shirt, paper airplane, and cow.", " a small triangular-shaped object."], "sample_ids": ["7adf9de5fb734455a3a3a7f084e3d628", "2d02985030804209a26c2c53b96a06f9"], "properties": ["cat, toilet, white bird with black eyes, stuffed animal, man in a white shirt, paper airplane, cow", "shape, triangle, small"], "captions_pred_pc": ["a black and white image of a flying saucer", "a black and white image of a piece of metal"], "captions_pred_image": ["a black and white image of a fighter plane flying upside down", "a black piece of furniture on a white background"], "question": "which object is a triangle?", "label": 1}, {"captions": [" of a red and white pokeball with the word \"pichu\" on it, compatible with various modeling and animation software.", " of a stone wall with a window and multiple stone arches."], "sample_ids": ["7d76c44cb9dd4948b8766d83994ca5f3", "db74ee1621464be1b164be26a1af050e"], "properties": ["- material is stl, obj, fbx- size is 240px- color is red, white", "window, arches, wall"], "captions_pred_pc": ["a black and white circular pattern on a white background", "a black and white illustration of a bolt and nut on a white background a black and white illustration of a bolt and nut on a white background royalty free illustration"], "captions_pred_image": ["a close-up view of a black ball with a screw in the center", "a 3d model of an old brick wall"], "question": "which entity has a window?", "label": 1}, {"captions": [" a house with a green roof and lawn.", " a building featuring yellow columns, a yellow roof, and a wooden structure."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "0ce6a4102f4f40e2a0084938b0a93941"], "properties": ["roof, green, lawn", "structure, columns, roof"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a 3d model of a building with multiple levels"], "question": "which building has a wooden structure?", "label": 1}, {"captions": [" a spacious office featuring numerous desks, chairs, computers, and a ceiling with blue and white triangles and a ceiling light.", " of a barrel and a cube together."], "sample_ids": ["0d7e4d9471414a21b4a5b18a54f7ec22", "f00b7661daf544b68cddf85d7d0308c7"], "properties": ["ceiling, light, desks", "a, barrel, cube"], "captions_pred_pc": ["a black and white drawing of a square on a white background", "a black and white illustration of a 3d cube and a 3d sphere"], "captions_pred_image": ["a 3d model of an office space with desks and chairs", "a 3d model of a barrel and a box next to each other royalty free 3d model preview no.3"], "question": "which object is not a barrel?", "label": 0}, {"captions": [" a pink-framed building structure with beams and trusses.", " a building with a metal and wooden pole structure."], "sample_ids": ["18e392c5360146eda498c5edab25b15c", "e2e2ab4474b84f33809979da457eedd9"], "properties": ["frame, beams, trusses", "structure, material, pole"], "captions_pred_pc": ["a black and white drawing of a metal grate", "a black and white illustration of a line of dots on a white background"], "captions_pred_image": ["a 3d model of a building under construction", "a 3d model of a structure with multiple tables and chairs"], "question": "which structure is made of metal and wooden poles", "label": 1}, {"captions": [" a house with roof trusses and wooden beams on a suspended ceiling.", " of two rocks with ice elements."], "sample_ids": ["3c2e3a3670b042069bd8290e2c357702", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["roof trusses, beams, suspended ceiling", "image is a rock with ice elements"], "captions_pred_pc": ["above a black and white drawing of a building", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a house with a roof in progress royalty free 3d model preview no. 1", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a small yellow table with a staircase and a square ceiling light fixture.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["36f4d2cbd02345c6a77f7345ebde841c", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["table, staircase, light", "house, pool, balcony"], "captions_pred_pc": ["a black and white photo of the letter g", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a table with stairs", "a 3d model of a modern house"], "question": "which entity has a pool", "label": 1}, {"captions": ["a 3d white axe, hammer, and spoon.", " a damaged room with destroyed furniture, featuring a kitchen, bathroom, and broken window."], "sample_ids": ["96d127abd21049689918e671ec613ef8", "06dd6456dc244a51b6b6e1c8524820de"], "properties": ["axe, hammer, spoon", "room, furniture, window"], "captions_pred_pc": ["of a black lace belt on a white background", "above a black and white drawing of a person sitting on a bench"], "captions_pred_image": ["a 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe", "a 3d image of a room with a person in it"], "question": "which entity is a room?", "label": 1}, {"captions": ["a white teddy bear, cloud, skull, octopus, and bird in various positions on a gray background.", " a white building."], "sample_ids": ["dd5849aced0443b1b4b38d413f7e06c4", "4decc6a3d12c47888f8fec543153a985"], "properties": ["background, color, white", "color, white, building"], "captions_pred_pc": ["a black and white image of a cat's head", "a black and white drawing of a bottle"], "captions_pred_image": ["a 3d model of an animal skull in white on a gray background", "a 3d model of a building on a white background"], "question": "which building is white", "label": 1}, {"captions": [" of a green bush with tree-like leaves.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["cb91cb6149a142a8a196a268dcf36aa8", "bf18bfd89efd43389781050230467d58"], "properties": ["leaf, color, shape", "Lights, number, five"], "captions_pred_pc": ["a black and white dots on a white background a black and white dots on a white background stock illustration", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 360 degree view of a bushy plant on a gray background royalty free 360 degree view of a bushy plant on a gray background", "a white chandelier with five white shades"], "question": "which entity has more lights", "label": 1}, {"captions": [" a brick building with a roof structure and roof truss.", " a diverse town featuring houses, buildings, people, animals, and desert elements."], "sample_ids": ["84e8acad28664a738df69d719df9e263", "436d6492fa06466680ecc82e5e07a7a0"], "properties": ["roof, structure, truss", "house, building, people"], "captions_pred_pc": ["a black and white polka dots pattern on a white background polka dots pattern on a white background illustration", "a black and white drawing of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a brick building with a roof", "a 3d model of a small town in the middle of a field"], "question": "which entity has more buildings", "label": 1}, {"captions": ["a featuring a bowl of soup, a brush, a knife, a slice of pizza, a torn piece of paper, and a roof with a hole in it.", " a rusty green metal box with a handle and a gun inside."], "sample_ids": ["db19e46828c94b1a8b9a6ca9f673c604", "76cfd0e88ce243d483919a018a4f1a9e"], "properties": ["a, roof, soup", "box, handle, gun"], "captions_pred_pc": ["in 15 words or fewer a black and white illustration of the moon", "a black and white square with dots on a white background"], "captions_pred_image": ["a 3d model of a bowl and chopsticks on a sheet of paper", "a 3d rendering of a metal box with a handle"], "question": "which entity has a handle?", "label": 1}, {"captions": [" a large metal building with a roof and truss structure.", " a multicolored metal building structure with a roof."], "sample_ids": ["b85a99699ccd4bcba213322113bb253d", "22483891fd124baca3bbc6a6a49adc9c"], "properties": ["roof, truss, structure", "color, roof, structure"], "captions_pred_pc": ["of a metal grate on a white background", "of a black and white photo of a bike on a white background"], "captions_pred_image": ["a 3d model of a long metal fence", "a 3d model of a barn structure"], "question": "which building has a roof", "label": 1}, {"captions": [" a large house/building structure with a roof.", " a wooden house with a roof and framing."], "sample_ids": ["82859e4c6d4e4bbea94b6252bef1d398", "4634a9bdf54549a99f68be77f1464b0a"], "properties": ["roof, structure, house", "roof, framing, material"], "captions_pred_pc": ["a black and white photograph of a metal sculpture", "a black and white drawing of an abstract pattern"], "captions_pred_image": ["a 3d model of a large white structure", "a 3d model of a barn structure"], "question": "which house has a roof made of wood", "label": 1}, {"captions": [" of a house with a roof truss, chimney, and suspended ceiling.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["9401dfc901b2447a9c0eb27da56854d7", "c3a82df41875402285608ef13a55df57"], "properties": ["roof truss, chimney, suspended ceiling", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a house with a roof", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a set of yellow and black tools, including a magnifying glass, hexagonal keys, and screwdrivers.", " a large metal building with a roof and truss structure."], "sample_ids": ["40285a60f32749a8ae38957c7b073fe8", "b85a99699ccd4bcba213322113bb253d"], "properties": ["color, yellow, black", "roof, truss, structure"], "captions_pred_pc": ["a set of three black and white keychains on a white background", "of a metal grate on a white background"], "captions_pred_image": ["a 3d model of a pair of hexagonal keys and a pair of hexagonal scissors royalty free 3d model preview no 3", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": [" a medieval stone castle with walls and stairs.", "a 3d white cube with windows resembling a building."], "sample_ids": ["10bc1bbec4c045f9b15fc2156b5e32ee", "4a07a5293f024bb0a353954a056ef626"], "properties": ["wall, stairs, castle", "- material is white- color is white- texture is textured"], "captions_pred_pc": ["a castle in the snow a castle in the snow illustration on a white background royalty free illustration", "a black and white image of a square made up of small dots"], "captions_pred_image": ["a 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the", "a 3d model of a cube"], "question": "which entity is not a building?", "label": 0}, {"captions": [" a silver spaceship-skull hybrid with blue lights, spheres, and jewels, featuring a robot with a head and blue eyes.", "a white of a spaceship and building."], "sample_ids": ["ae7c805076b14abe83578069c7bf1c03", "bf7d4277c9184d35abdec85bd5e25956"], "properties": ["color, light, jewels", "image, building, spaceship"], "captions_pred_pc": ["in 15 words or less a black and white image of a snowflake on a white background royalty free illustration", "a black and white drawing of a tree"], "captions_pred_image": ["a 3d sculpture of a heart with various objects on top of it", "a 3d model of a white object on a gray background"], "question": "which image shows a spaceship and building?", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " a robot with a blue, purple, and white body."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "6f98acb9e03c4cbd9c83f2c8f9cd3ddc"], "properties": ["color, white, black, white", "body, color, white"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "above a black and white image of a robot"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d model of a robot standing in the middle of a white background"], "question": "which object has a white body", "label": 1}, {"captions": [" of a white sculpture, resembling a horse and paper plane, on a gray background.", " of a sword with a wooden handle."], "sample_ids": ["179b4438edfc4a43a27a83784f38ff4b", "bf448dbb4b6a43d89b2514929e8f7c43"], "properties": ["color, background, white", "handle, material, wood"], "captions_pred_pc": ["above a black and white image of a sculpture in the shape of a bird", "a black and white image of a sword on a white background"], "captions_pred_image": ["a 3d printed sculpture of a horse's head on a gray background", "a black and white image of a sword with two blades"], "question": "which object has a wooden handle", "label": 1}, {"captions": [" a large, multi-floor building with columns, shelves, conveyor table, and a ceiling structure featuring pipes.", " a wooden shed with a gray roof."], "sample_ids": ["6d773d2b0ed9437ea2b9b352bd8a5c25", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["building, floor, columns", "roof, color, gray"], "captions_pred_pc": ["in one line a black and white drawing of a dotted pattern on a white background", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a large white table with multiple shelves", "a 3d model of a shed with a gray roof"], "question": "which building has a roof that is gray", "label": 1}, {"captions": ["a small white wooden castle with flags, turrets, and two towers.", " of a small white building with stairs and a lid."], "sample_ids": ["345b1c7d2d9b4524aa0dcdc3cd27e4da", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["turrets, flags, towers", "building, stairs, lid"], "captions_pred_pc": ["a black and white illustration of a square made up of many small dots on a white background", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a castle with towers and a drawbridge royalty-free 3d model preview no.1", "a 3d model of a white box on a gray background"], "question": "which entity has a lid?", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " a small white house with stairs and a spiral staircase, featuring a white table and ceiling light."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "e9e1cc7fae22458197a61f43a9c355f4"], "properties": ["roof truss, insulation, suspended ceiling", "house, staircase, table"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "above a black and white photograph of a dog in a frame"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d model of a small house with a spiral staircase"], "question": "which house has a staircase?", "label": 1}, {"captions": ["a featuring white and red cubes, and a pink and white chair.", " a house with a green, wooden-structured roof."], "sample_ids": ["f2c44a82ba744ba8b93e9a1c2272c117", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["color, white, red, pink", "roof, color, green"], "captions_pred_pc": ["a black and white illustration of a house made of dots", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a 3d model of a white structure with stairs", "a 3d model of a house with a triangular roof"], "question": "which entity has a roof that is the color of green?", "label": 1}, {"captions": [" a small house with a staircase, balcony, and wooden floor.", "a white of a man with arms outstretched."], "sample_ids": ["e67e211004cb450cbaf8139dd74ba39b", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["floor, staircase, balcony", "image, color, white"], "captions_pred_pc": ["a black and white drawing of a wallet", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a 3d model of a bench on a wooden floor", "a 3d model of a man with his arms outstretched"], "question": "which image is white", "label": 1}, {"captions": [" a small house with a yellow roof and chimney.", " a white building with a red roof."], "sample_ids": ["0056e85a243b47a08ddbcd36816cb6ae", "1f9580be397d4f948bf53fe1d5bc5756"], "properties": ["roof, yellow, chimney", "color, white, roof, red"], "captions_pred_pc": ["a black and white illustration of a house made up of dots on a white background a black and white illustration of a house made up of dots on a white background royalty free illustration", "in 15 words or fewer a black and white drawing of a camera"], "captions_pred_image": ["a 3d model of a small house royalty-free 3d model preview no.2", "a 3d model of an office building"], "question": "which building has a red roof", "label": 1}, {"captions": ["a red toy robot cowboy with a hat and glasses.", " a mouse wearing a top hat and a teddy bear holding a spoon."], "sample_ids": ["83a910fb2e714b7082fb7606fce83dc4", "887e410d07854396b563325ae1929583"], "properties": ["color, hat, glasses", "hat, mouse, bear"], "captions_pred_pc": ["a black and white image of a brake pad", "a black and white illustration of a snowflake on a white background"], "captions_pred_image": ["a 3d model of a cowboy hat on top of a vending machine royalty free 3d model no.2", "a 3d model of a mouse wearing a top hat and bow tie"], "question": "which entity has a hat", "label": 1}, {"captions": [" a red and blue metal-framed building structure with a steel house frame.", " a house with a pink roof, brick walls, and insulated ceiling."], "sample_ids": ["a453ac83ef1e4f76bd7978451888d5f5", "c8936ace72954650b4e2d84246964849"], "properties": ["color, material, frame", "roof, color, pink"], "captions_pred_pc": ["of a black and white photo of a bike on a white background", "a black and white drawing of a toilet"], "captions_pred_image": ["a 3d model of a metal frame structure", "a 3d model of a house with a roof"], "question": "which house has a pink roof", "label": 1}, {"captions": ["a featuring a large flying ship, a mountain range with a central lake, and a small island resembling hawaii.", " of two rocks with ice elements."], "sample_ids": ["4d613d2057454e719bcae7f8cf05210a", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["a, island, resembles, hawaii", "image is a rock with ice elements"], "captions_pred_pc": ["above a black and white image of a person's hand holding a pencil", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a", "a 3d image of two rocks on a gray surface"], "question": "which image shows a rock with ice elements?", "label": 1}, {"captions": [" of a mushroom cloud with white tree and coral plant elements, featuring blue and green leaves.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["bca5233d878e4cf09b5bc2bb6f3915b0", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["color, shape, texture", "torso, breasts, pattern"], "captions_pred_pc": ["a black and white image of a square with dots on it", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a 3d model of a tree made out of white flowers", "a 3d model of a woman's chest"], "question": "which entity has a pattern", "label": 1}, {"captions": [" a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light.", "a featuring a room with a staircase, a damaged bus and rv, two houses, a large white box, and destroyed buildings with debris."], "sample_ids": ["5ea0962b100b4fccb761ed84afe027b5", "a47dcf3d3cf34c58af17b6715d6f1232"], "properties": ["house, table, chair", "room, staircase, bus"], "captions_pred_pc": ["above a black and white photograph of an open door", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d rendering of a small white table with a chair", "a 3d image of a building with a lot of debris"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" of a white wall-mounted light switch, electrical outlet, and various furniture pieces.", " a group of ponies in a row."], "sample_ids": ["b195bf7ba6094e1b812e4312deeeb360", "e2c00fdbc5bd40bba2c41b62520a58e9"], "properties": ["light switch, electrical outlet, furniture", "group, row, pony"], "captions_pred_pc": ["a black and white image of a pair of sunglasses", "a black and white drawing of an octopus on a white background"], "captions_pred_image": ["a 3d rendering of a room with a white background", "a 3d model of my little pony"], "question": "which entity is a group of ponies?", "label": 1}, {"captions": ["a 3d object featuring elements of a green and purple lamp, a cartoon character with a light bulb, an ice cream cone, a syringe, a light pole, a pen with a face, and a doctor's stethoscope.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["42d3657ce7bc4b5dbeb7f444e089a715", "bf18bfd89efd43389781050230467d58"], "properties": ["a lamp, a syringe, a light pole", "Lights, number, five"], "captions_pred_pc": ["a spoon with black dots on a white background spoon with black dots on a white background, 3d illustration royalty free stock photo", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop", "a white chandelier with five white shades"], "question": "which entity has more lights", "label": 1}, {"captions": [" a small house with a tree and a rock.", "a featuring a skeleton, torn paper, long stick, rock, and broken wood."], "sample_ids": ["9dc392a7f6e444e5bfb720684d6f864a", "46903bf029934b1989bc062dcb0a5531"], "properties": ["house, tree, rock", "skeleton, torn, paper, long stick, rock, broken wood"], "captions_pred_pc": ["in 15 words or less the image depicts a white square on a white background with black dots all over the square stock illustration", "a close up of a black object on a white background"], "captions_pred_image": ["a 3d model of a small house with a tree in front of it", "a 3d sculpture of a person's hand in the air royalty-free 3d model preview"], "question": "which entity has a skeleton?", "label": 1}, {"captions": [" a small building with a roof.", " a house with a wooden-framed roof structure."], "sample_ids": ["a3089017e4c5463d852de65abf61eda5", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["roof, building, small", "roof, material, wood"], "captions_pred_pc": ["in 15 words or less a black and white image of a building with dots all over it", "a black and white drawing of a staircase"], "captions_pred_image": ["a black and white image of a small house", "a 3d model of a building with a roof"], "question": "which building has a roof made of wood", "label": 1}, {"captions": ["a featuring a white and blue structure with a table, blue blocks, and suspended blue cubes.", " a small building with a school, house, soccer field, and white ceiling light, featuring a white and green paper clip."], "sample_ids": ["1d2cfe3a03004b62b17d3ce065658302", "97f487941d26472294e005fa97c403be"], "properties": ["color, table, blocks", "color, white, green"], "captions_pred_pc": ["a group of people standing on top of each other on a white background a group of people standing on top of each other on a white background royalty free illustration", "for a black and white drawing of a computer keyboard"], "captions_pred_image": ["a 3d model of a table with four legs", "a 3d model of an office building"], "question": "which entity has a white and green paper clip?", "label": 1}, {"captions": [" a staircase with a railing, table, and chair, featuring a square ceiling light.", "a featuring a staircase, small red and white building, red box, table, and ceiling light."], "sample_ids": ["51a0fba79bce472a8b827b78a110b77f", "11e2e8ca1f8849e394dfbf532c6d7ae0"], "properties": ["stair, table, chair", "a, building, staircase"], "captions_pred_pc": ["for a black and white image of a toilet paper holder", "for a black and white photograph of a piece of metal"], "captions_pred_image": ["a 3d model of a staircase in a room", "a 3d model of a staircase in the middle of a floor"], "question": "which staircase is in a building?", "label": 1}, {"captions": [" of a wooden stand featuring two legs, six balls, and a row of teddy bears, resembling a puzzle toy, game board, and bookshelf.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["e36ba9c060cd49f48a0acc1790fcf049", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["resembles, toy, bookshelf", "camera, speaker, ceiling fan"], "captions_pred_pc": ["a black and white image of a book cover", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d rendering of a wooden display stand with six cups on it", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": [" of a loaf of bread and a piece of chocolate cake.", " a small village featuring houses, trees, and a winding road."], "sample_ids": ["3a6cda16adee41ebbe3cbb8c6cdbf464", "7acf46c0265d4e39b97ac084852abde8"], "properties": ["bread, chocolate, cake", "houses, trees, road"], "captions_pred_pc": ["a black and white image of a sponge on a white background sponge on a white background royalty free illustration", "in 15 words or less a black and white photo of a mountain landscape"], "captions_pred_image": ["a black and white image of a loaf of bread", "a black and white photograph of a small town"], "question": "which entity has more trees", "label": 1}, {"captions": ["a 3d white box with black trim, stripes, and handles.", "a featuring a white box with a hole, a paper clip, and a lock."], "sample_ids": ["55b26130f1514032be078e13fd982905", "839e143bb1684aaa955f2c3e0cf4eef2"], "properties": ["color, shape, material", "box, paper clip, lock"], "captions_pred_pc": ["a black and white drawing of a square made up of dots", "above a black and white image of a clock in the shape of a spiral"], "captions_pred_image": ["a 3d model of a white box with a black handle", "a 3d model of a stapler with a staple in it royalty free 3d model no."], "question": "which box is white", "label": 1}, {"captions": [" of a green pickle", " a clear glass table with metal legs and balls on top."], "sample_ids": ["4c7904175d6d4676b73866c25110e8d3", "7c2bfa826f274377ac21f48d510848c3"], "properties": ["color is green, shape is cylinder, material is plastic", "glass, metal, balls"], "captions_pred_pc": ["of a black object on a white background", "a black and white image of a wine glass"], "captions_pred_image": ["a 3d model of a peanut on a white background royalty free 3d model preview no.3", "a clear acrylic foosball table"], "question": "which object is made of glass", "label": 1}, {"captions": [" a white and silver human torso sculpture with rocks.", " of two rocks with ice elements."], "sample_ids": ["3978258c3f26401681c6e44b404e2cca", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["color, material, texture", "image is a rock with ice elements"], "captions_pred_pc": ["above a black and white drawing of a skull", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d sculpture of a rock formation on a white background", "a 3d image of two rocks on a gray surface"], "question": "which entity is a rock?", "label": 1}, {"captions": ["a white of a helmet and airplane wing.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["17f3bb7773ef4b2ea76134896b105fbf", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["color, helmet, airplane wing", "torso, breasts, pattern"], "captions_pred_pc": ["a black and white image of a person's head with dots all over it", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a 3d model of a white helmet on a gray background", "a 3d model of a woman's chest"], "question": "which entity has a teddy bear?", "label": 1}, {"captions": ["a 3d white object featuring stacked racks, toy train, blocks, plastic pipes, lego pieces, and clothes hangers.", "a 3d object featuring a white tray with a decorative pattern, a silver tray with a bird, a laptop, a long knife, and a metal bar."], "sample_ids": ["c9c786f133f54d5e8d99bfc1a588df41", "b1099ba41d9f4af19d1a91761bb6074c"], "properties": ["stacked, racks, toy train", "Object, Tray, Tray"], "captions_pred_pc": ["a black and white photo of a person standing in front of a white wall", "above a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of a ship's propeller on a white background", "a 3d image of a white tray with an intricate design"], "question": "which object is a tray?", "label": 1}, {"captions": [" a multicolored metal building structure with a roof.", " a house with a wooden-framed roof structure."], "sample_ids": ["22483891fd124baca3bbc6a6a49adc9c", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["color, roof, structure", "roof, material, wood"], "captions_pred_pc": ["of a black and white photo of a bike on a white background", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a barn structure", "a 3d model of a building with a roof"], "question": "which structure has a roof made of wood", "label": 1}, {"captions": [" orange and yellow fish, candy, and a pair of shoes.", "a 3d object featuring a white tray with a decorative pattern, a silver tray with a bird, a laptop, a long knife, and a metal bar."], "sample_ids": ["0fa2a605d7e940e5946f63c0f74234f3", "b1099ba41d9f4af19d1a91761bb6074c"], "properties": ["color, shape, and material", "Object, Tray, Tray"], "captions_pred_pc": ["a black and white photograph of a group of plastic cups", "above a black and white image of a piece of paper"], "captions_pred_image": ["a 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes on a white background 3d rendering of a pair of shoes", "a 3d image of a white tray with an intricate design"], "question": "which object is made of metal", "label": 1}, {"captions": ["a black and gold cylindrical object with a gold handle, resembling a cigarette lighter, lamp, and pen.", "3d snowman model with a wooden stick."], "sample_ids": ["c9b1c89380e947f58aa06eb56c93c6d8", "44f41039246a4df59027c38023d5a576"], "properties": ["- color is black and gold- shape is cylindrical- material is metal", "- material is wood - color is white - shape is 3d"], "captions_pred_pc": ["a black and white image of a circular object on a white background", "a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a"], "captions_pred_image": ["a 3d model of a black and white object on a gray background", "a 3d snowman royalty-free 3d model preview"], "question": "which object is made of wood", "label": 1}, {"captions": ["a 3d white cube featuring a hole, wheels, and a diamond.", " a large white and metal building with a metal roof structure."], "sample_ids": ["e44009d33258425e8efedfbc6823bf70", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["- color is white- shape is cube- material is plastic", "roof, metal, white"], "captions_pred_pc": ["for a black and white image of a toothbrush in the shape of a toothbrush", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a 3d model of a white cube", "a 3d model of a large white box"], "question": "which is not a building", "label": 1}, {"captions": [" a pyramid with blue and pink lines, wires, and mesh.", "a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier."], "sample_ids": ["a3b2db8d5c6044f88b275839d0cd71bd", "ef8288c9fdfc4e0f9c1fe25d570a104e"], "properties": ["color, shape, material", "color is white, yellow, plastic"], "captions_pred_pc": ["a black and white image of a patterned rug", "a black and white image of a metal bowl with dots"], "captions_pred_image": ["a 3d model of the pyramid roof royalty-free 3d model preview no.1", "a white plastic container with a label on it"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a house with a roof structure and toothbrushes.", " a house with a roof structure, featuring a brick wall and suspended box."], "sample_ids": ["7632d1ba4e8144c19484c263b6074d0c", "1a7bfcf3755142bab90d3d7cb02d0f2c"], "properties": ["house, roof, toothbrushes", "roof, structure, wall"], "captions_pred_pc": ["a black and white illustration of the letter 'b' isolated on a white background illustration", "a black and white illustration of a group of dots on a white background"], "captions_pred_image": ["a 3d rendering of a white box with a lot of blades", "a 3d model of a building with a roof"], "question": "which house has a brick wall?", "label": 1}, {"captions": [" a small robot with green and yellow parts, a hat, and a green handle, accompanied by a person riding on a green stool.", " a small blue robot with a windmill."], "sample_ids": ["c927a45ebfd847e7a31b54cd2bb76b06", "5f915b161cc74fedad52d8663b41f87a"], "properties": ["color, hat, handle", "color, blue, robot"], "captions_pred_pc": ["above a black and white illustration of a toy robot", "a black and white image of a 3d sculpture"], "captions_pred_image": ["a 3d model of a person standing on top of a table", "a 3d model of a wind turbine"], "question": "which robot is blue?", "label": 1}, {"captions": [" a small house with a red roof.", "red swivel chair with a white base ."], "sample_ids": ["085db9059b744673b5623b5338e02196", "7b78fb47a2684906bcc22ac6e848999a"], "properties": ["roof, red, house", "color, red, base, white"], "captions_pred_pc": ["a black and white dotted square on a white background", "a black and white image of a sphere made up of many small dots on a white background"], "captions_pred_image": ["a 3d model of a small shed in the snow", "a 3d model of a modern chair"], "question": "which object has a white base?", "label": 1}, {"captions": ["white pendant light fixture ()", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["a7ce6d03c06d4c32bb507d7f1ee3c971", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["color is white, material is metal, light source is incandescent", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["in 15 words or less a black and white illustration of the letter 'p'", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a white hanging light fixture with a white shade", "a 3d rendering of a plastic box with several compartments"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a red circle, red and white arrow, mouse, and child's handwriting.", " a pink-framed building structure with beams and trusses."], "sample_ids": ["3c233f87bf264968a7f0660b9eac9e4a", "18e392c5360146eda498c5edab25b15c"], "properties": ["red, mouse, handwriting", "frame, beams, trusses"], "captions_pred_pc": ["in 15 words or less a black and white image of a person's hand holding a pencil and drawing on a piece of paper", "a black and white drawing of a metal grate"], "captions_pred_image": ["a black and white drawing of a person's hand holding a pencil", "a 3d model of a building under construction"], "question": "which entity has a frame?", "label": 1}, {"captions": [" a small house with stairs and a balcony.", " a small wooden house."], "sample_ids": ["0fbc5f16d301450c820b1f2158fd4f69", "4cb4dba1237443eb8dc299530fa12521"], "properties": ["balcony, stairs, house", "house, material, wood"], "captions_pred_pc": ["a black and white image of a square with dots on it", "a house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots"], "captions_pred_image": ["a 3d model of a building with two floors and a balcony", "a 3d model of a small cottage"], "question": "which house is made of wood", "label": 1}, {"captions": [" of a plague mask with a rusty, horned, wooden helmet and a crow's head design.", " a white castle composed of small cubes."], "sample_ids": ["2b0896f810074399a5ae7d6dbab8c330", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["- material is wood, rusty, horned", "composed of, white, cubes"], "captions_pred_pc": ["in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["3d model of a plague doctor's mask", "a 3d model of a castle made of white cubes"], "question": "which entity is composed of small cubes", "label": 1}, {"captions": [" a white and black chair with a black handle and armrest.", " a pink-framed building structure with beams and trusses."], "sample_ids": ["cf1f435c54b046f68d6603cd3369a94f", "18e392c5360146eda498c5edab25b15c"], "properties": ["color, black, white, handle, armrest", "frame, beams, trusses"], "captions_pred_pc": ["a black and white drawing of a geometric shape", "a black and white drawing of a metal grate"], "captions_pred_image": ["a 3d rendering of a white chair with a black handle", "a 3d model of a building under construction"], "question": "which entity has a frame?", "label": 1}, {"captions": [" a spacious office featuring numerous desks, chairs, computers, and a ceiling with blue and white triangles and a ceiling light.", " a spiral staircase with a railing in a small building."], "sample_ids": ["0d7e4d9471414a21b4a5b18a54f7ec22", "28cae056856c4a8ba9d1a6af5355f831"], "properties": ["ceiling, light, desks", "staircase, railing, building"], "captions_pred_pc": ["a black and white drawing of a square on a white background", "a black and white photograph of a light switch"], "captions_pred_image": ["a 3d model of an office space with desks and chairs", "a 3d model of a staircase in a white room"], "question": "which entity has a railing", "label": 1}, {"captions": ["a 3d object collection featuring a long stick, a grassy hill, a fish, a green and yellow knife, a small green and purple fish, a bird, and a crocodile.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["856e5f6a854d4c608901f1e2b580344c", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["a, b, c", "house, fence, playground"], "captions_pred_pc": ["above a black and white drawing of a submarine", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a black and white photograph of a fish on a gray background", "a 3d model of a room with a lot of wires"], "question": "which entity has a fence", "label": 1}, {"captions": [" a white hospital operating room with blue containers and medical equipment.", " a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light."], "sample_ids": ["a10f3196831647bc8842eacac640047d", "5ea0962b100b4fccb761ed84afe027b5"], "properties": ["color, white, containers", "house, table, chair"], "captions_pred_pc": ["a black and white illustration of the letter 'f' made up of tiny dots", "above a black and white photograph of an open door"], "captions_pred_image": ["a 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room", "a 3d rendering of a small white table with a chair"], "question": "which entity has a table and chair?", "label": 1}, {"captions": [" a wooden frame featuring a curved, colorful screen.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["e6ff8537a6ba409296954984071218a3", "bded33af34104b9686b845dfd18309a9"], "properties": ["frame, screen, color", "table, staircase, light"], "captions_pred_pc": ["of a black and white cross on a white background", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d rendering of a black screen with a circular pattern", "a 3d model of a small table with a staircase"], "question": "which object has a staircase?", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["yellow, table, roof", "camera, speaker, ceiling fan"], "captions_pred_pc": ["a black and white drawing of a floor plan", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": ["red and yellow toy robot fireman with a helmet, wheels, and holding a fire extinguisher.", " a small village featuring houses, trees, and a winding road."], "sample_ids": ["e01a1919e9404ca7a4dd52c593649b62", "7acf46c0265d4e39b97ac084852abde8"], "properties": ["color, wheel, helmet", "houses, trees, road"], "captions_pred_pc": ["for a black and white image of a pair of earrings", "in 15 words or less a black and white photo of a mountain landscape"], "captions_pred_image": ["a 3d model of a robot wearing a helmet and carrying a large object", "a black and white photograph of a small town"], "question": "which entity has a road?", "label": 1}, {"captions": ["a small 3d-printed plastic model of a lighthouse.", " a destroyed car with rusted, broken metal and torn paper."], "sample_ids": ["9910376529aa4724af8cc35eb2e51146", "3fe31c3bf5cd4574a8ca02222411a988"], "properties": ["size, material, lighthouse", "metal, rusted, paper"], "captions_pred_pc": ["a black and white photo of a water droplet", "a black and white drawing of a person sitting in a chair"], "captions_pred_image": ["a black and white photograph of a lighthouse model", "a black and white image of a piece of debris on the ground"], "question": "which entity is made of metal", "label": 1}, {"captions": [" a diverse town featuring houses, buildings, people, animals, and desert elements.", "white of a rhino head with horns."], "sample_ids": ["436d6492fa06466680ecc82e5e07a7a0", "8481aade84de47cab1a9accf8067e678"], "properties": ["house, building, people", "image, rhino, head"], "captions_pred_pc": ["a black and white drawing of a person sitting on a bench", "of a penguin skull in black and white"], "captions_pred_image": ["a 3d model of a small town in the middle of a field", "rhino head 3d model royalty free 3d model preview no 3"], "question": "which image shows a rhino head?", "label": 1}, {"captions": [" of a white tiled floor with a ceiling light and small holes.", " of two rocks with ice elements."], "sample_ids": ["9906caefe141465990aacb312e1025f0", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["light, floor, ceiling", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white polka dot pattern on a white background", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a white tile floor", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a small bedroom with wooden floors, walls, roof, and shelf.", " a multi-level building with yellow stairs and columns."], "sample_ids": ["e602ac60041f4b4f84c044161e478781", "c1536f68727947ff9e7810799fac583a"], "properties": ["floor, wall, roof", "level, stairs, columns"], "captions_pred_pc": ["above a black and white image of a decorative metal bar", "in 15 words or less a black and white illustration of dots on a white background"], "captions_pred_image": ["a 3d model of a room with wooden walls and a rug on the floor", "a 3d model of an apartment building"], "question": "which entity has more levels", "label": 1}, {"captions": [" a white arrowhead-shaped rock.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["d0d2a6bc3c3440028789eba8f9894d8e", "97e000ff41094665afd94ea565da8b13"], "properties": ["shape, color, material", "roof, material, wood"], "captions_pred_pc": ["of an arrowhead on a white background stock illustration illustration of an arrowhead on a white background royalty free illustration illustration of an arrowhead on a white background vector illustration of an arrowhead on a white background royalty free illustration illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d model of a snowflake on a white background", "a 3d model of the roof of a building"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a yellow triangular kite, resembling a bird with long legs, flying like a glider or airplane.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["22228e69a1704e58af5c9dd184843508", "a17477b445b3443189dad22f768b888b"], "properties": ["yellow, shape, wingspan", "roof, pillar, stairs"], "captions_pred_pc": ["above a 3d rendering of a human body on a white background", "a black and white image of a square with dots"], "captions_pred_image": ["a white kite flying in the sky on a gray background", "a 3d model of a small building with a balcony"], "question": "which object has a roof", "label": 1}, {"captions": ["a featuring a train, large ship, long metal pipe, boat, black map of kenya, and a black piece of plastic.", "a featuring a red hat, floating cup, bowl filled with candy, and a strawberry."], "sample_ids": ["49c5a9d42ba64fb2b681ef583d700b98", "e27a9fd533dc41da9cf2eeb8fee2a5af"], "properties": ["a train, a ship, a boat", "hat, candy, strawberry"], "captions_pred_pc": ["above a black and white image of a long, curved line on a white background", "a black and white illustration of two spheres"], "captions_pred_image": ["a 3d model of a submarine", "a black and white image of a person wearing a hat"], "question": "which entity has a strawberry?", "label": 1}, {"captions": [" a pink room featuring a bed, desk, window, and lamp.", " a house with a blue roof."], "sample_ids": ["395af20de6fe49dbbbb030f0e452cbe1", "8ff693cd3ca74f8a901ca259b8b3a7ac"], "properties": ["bed, desk, window", "roof, color, blue"], "captions_pred_pc": ["of a black and white drawing of a curved line", "a black and white drawing of a cross on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a bedroom royalty free 3d model preview no.2", "a 3d model of a house with a roof"], "question": "which entity has a roof that is blue", "label": 1}, {"captions": ["a featuring a bowl of soup, a brush, a knife, a slice of pizza, a torn piece of paper, and a roof with a hole in it.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["db19e46828c94b1a8b9a6ca9f673c604", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["a, roof, soup", "torso, breasts, pattern"], "captions_pred_pc": ["in 15 words or fewer a black and white illustration of the moon", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a 3d model of a bowl and chopsticks on a sheet of paper", "a 3d model of a woman's chest"], "question": "which entity has a shirt with a hexagonal pattern?", "label": 1}, {"captions": [" of a white building with a small house and a desk with a laptop.", " of a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["9244a2d3a9e94c8398ef991f1661bb58", "8cd3f5ff0fc041eca9a480faa6739480"], "properties": ["a, desk, laptop", "roof, trusses, beams"], "captions_pred_pc": ["a black and white image of a piece of furniture", "in 15 words or less a black and white drawing of a window"], "captions_pred_image": ["a 3d model of an office desk on a white background", "a 3d model of a roof structure"], "question": "which entity has a roof structure with trusses and beams", "label": 1}, {"captions": [" of a wooden cabinet, shelf, and small house in a furnished room.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["854757ca755240f8b04576d899349151", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["cabinet, room, shelf", "camera, speaker, ceiling fan"], "captions_pred_pc": ["a black and white image of a toothbrush in the shape of the letter 'l' 3d illustration on a white background royalty free illustration", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a living room with a couch, coffee table, and television", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": [" a building featuring graffiti, chinese writing, a door, a broken window, and an interior bathroom with a sink.", " a two-story small apartment building with a roof."], "sample_ids": ["1ee3df6f94ea4c329a9c5245634e34d5", "8d1102e923954604ae7045a7ca14c1f6"], "properties": ["graffiti, chinese writing, door", "two-story, roof, building"], "captions_pred_pc": ["a black and white illustration of a bridge with dots", "a black and white pattern of dots in the shape of the letter c on a white background vector illustration of a black and white pattern of dots in the shape of the letter c on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of a bathroom with a sink and a toilet", "a 3d model of an apartment building royalty free 3d model preview no 2"], "question": "which building has a roof", "label": 1}, {"captions": [" a small blue and red blimp.", "a silver ring with a swirly design and a white 3d printed sphere."], "sample_ids": ["d9083e64d229434094dd91159de9bfd1", "8d81b384b5cc4f46a1779d0a2f5f7e27"], "properties": ["color, shape, size", "color, silver, white"], "captions_pred_pc": ["of a black circular object on a white background", "a black and white illustration of a circle with dots"], "captions_pred_image": ["a 3d model of a zeppelin airship royalty free 3d model preview no.2", "a 3d model of a silver ball on a gray background"], "question": "which object is white?", "label": 1}, {"captions": [" a small white house with a staircase and a window.", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["9eb88d17310d42dda9e17883e9922525", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["house, staircase, window", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["a black and white illustration of a person standing in the middle of a crowd of tiny black dots on a white background royalty free illustration", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a 3d rendering of a small room with a staircase", "a 3d rendering of a plastic box with several compartments"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a cube-shaped structure with a hole, featuring a bathroom scene with a person and a refrigerator.", " a house with a green, wooden-structured roof."], "sample_ids": ["59fbbe87e3714115b3daca3159e7ffac", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["shape, bathroom, refrigerator", "roof, color, green"], "captions_pred_pc": ["a black and white illustration of a person standing in front of a toilet", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a black and white photograph of a piece of furniture in a room", "a 3d model of a house with a triangular roof"], "question": "which structure has a green roof", "label": 1}, {"captions": ["a 3d object featuring a flat roof with blue and white pattern, a purple and white bench, a blue and purple striped runner, a bed with blue and purple stripes, blue and white striped paper, a bench with a blue and purple pattern, and a ceiling light fixture with wooden slats.", " a small white house with stairs and a spiral staircase, featuring a white table and ceiling light."], "sample_ids": ["28d0cf71ed684dcb87b2c9b2744c3633", "e9e1cc7fae22458197a61f43a9c355f4"], "properties": ["runner, bed, bench", "house, staircase, table"], "captions_pred_pc": ["a black and white drawing of a snowflake on a white background a creative black and white drawing of a snowflake on a white background royalty free illustration", "above a black and white photograph of a dog in a frame"], "captions_pred_image": ["a 3d model of a long, curved structure", "a 3d model of a small house with a spiral staircase"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" a house with a roof, roof truss, and suspended ceiling structure.", " a white and blue building with a black roof."], "sample_ids": ["5abf69f79b92484fb54d41ff0c0a2c11", "c893118316ee43e18322e5964b2806c5"], "properties": ["roof, truss, suspended ceiling", "color, white, blue, roof, black"], "captions_pred_pc": ["a suitcase made of dots on a white background a suitcase made of dots on a white background royalty free illustration", "a black and white illustration of a person standing on top of a hill made up of tiny dots"], "captions_pred_image": ["a 3d model of a house with roof trusses", "a 3d model of a white building on a gray background royalty free 3d model no."], "question": "which building has a black roof", "label": 1}, {"captions": [" of an ipad stand with a small speaker and blue-white light on a cylindrical base.", " a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light."], "sample_ids": ["e0694fef8e414d69a6a89cdffd212c86", "5ea0962b100b4fccb761ed84afe027b5"], "properties": ["base, speaker, light", "house, table, chair"], "captions_pred_pc": ["above a black and white illustration of a clock", "above a black and white photograph of an open door"], "captions_pred_image": ["a 3d model of a tablet stand on a pedestal royalty-free 3d model", "a 3d rendering of a small white table with a chair"], "question": "which entity has a table and chair?", "label": 1}, {"captions": [" a small white house with windows and a black lid.", " a damaged room with destroyed furniture, featuring a kitchen, bathroom, and broken window."], "sample_ids": ["4b40af369c1149949f5ccb68becd8430", "06dd6456dc244a51b6b6e1c8524820de"], "properties": ["white, windows, lid", "room, furniture, window"], "captions_pred_pc": ["above a black and white image of dots on a white background", "above a black and white drawing of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a white house with three windows", "a 3d image of a room with a person in it"], "question": "which entity has a window", "label": 1}, {"captions": ["a featuring a fireplace, wooden bench, sled, log, castle, and cat.", " a house with a blue roof."], "sample_ids": ["fe4cd6c3972940a5b7854ca1ebc8a5a3", "8ff693cd3ca74f8a901ca259b8b3a7ac"], "properties": ["fireplace, bench, log", "roof, color, blue"], "captions_pred_pc": ["a black and white illustration of a snowflake", "a black and white drawing of a cross on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a fireplace with snow on the ground", "a 3d model of a house with a roof"], "question": "which entity has a blue roof", "label": 1}, {"captions": [" a house with a green roof and lawn.", " a small, snow-covered house."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "0d00d10b90134dbe9ce7b2b3d6669237"], "properties": ["roof, green, lawn", "house, snow, cover"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "in 15 words or less a black and white image of a piece of paper on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a piece of broken glass on a white background"], "question": "which house is covered in snow", "label": 1}, {"captions": [" a red, blue, and green striped tower building.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["8668f9e9d1a64b86b31f260b8056cd19", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, red, blue, green", "a, material, clay"], "captions_pred_pc": ["a black and white drawing of a butterfly on a white background a black and white drawing of a butterfly on a white background royalty free illustration", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a pair of cylindrical towers with a staircase leading up to the top of one of the towers", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": [" of an orange shopping bag with handles and a \"t\" logo.", " a rusty green metal box with a handle and a gun inside."], "sample_ids": ["392dcf37195e43948cfbffe099082108", "76cfd0e88ce243d483919a018a4f1a9e"], "properties": ["color, logo, handle", "box, handle, gun"], "captions_pred_pc": ["a black and white image of a purse with a chain", "a black and white square with dots on a white background"], "captions_pred_image": ["a 3d model of a shopping bag royalty free 3d model no.2", "a 3d rendering of a metal box with a handle"], "question": "which object has a handle", "label": 1}, {"captions": ["a featuring a rock formation with various statues, including a woman, an eagle, and elements like wood and a shell.", " a house with a wooden-framed roof structure."], "sample_ids": ["36d90269173b4d1a84dbd61664593f66", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["a, eagle, wood", "roof, material, wood"], "captions_pred_pc": ["a black and white illustration of a map with dots all over it", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a person sitting on the edge of a cliff", "a 3d model of a building with a roof"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": [" a black and white box-like object with various interpretations, such as a coffee table, building, book, and ceiling fixture.", "s of a boat, bird, paper airplane, and kite flying in the air."], "sample_ids": ["404d7e2cd8894c31bdda02d2b3196464", "795cfa41d48a4cfc893ff1981318594d"], "properties": ["black, white, coffee table", "s, boat, bird, airplane, kite"], "captions_pred_pc": ["a black and white drawing of a square with dots on it", "above a 3d illustration of a boy standing with his arms outstretched"], "captions_pred_image": ["a black and white 3d model of a building", "a white kite flying in the air against a gray background"], "question": "which entity is a photograph?", "label": 1}, {"captions": [" a pink room featuring a bed, desk, window, and lamp.", " a snowy city with buildings and a plane flying overhead."], "sample_ids": ["395af20de6fe49dbbbb030f0e452cbe1", "cc63ceb2b5e84872a1a1f6423de419e2"], "properties": ["bed, desk, window", "building, plane, city"], "captions_pred_pc": ["of a black and white drawing of a curved line", "a black and white photo of an airplane on a white background"], "captions_pred_image": ["a 3d model of a bedroom royalty free 3d model preview no.2", "a 3d model of a city in black and white"], "question": "which entity is a city?", "label": 1}, {"captions": [" a white rock with green grass and moss on it.", "a featuring multiple animal heads, including a fox, chihuahua, and cat with ears."], "sample_ids": ["0b53ca3fca7b4ef7bef27cb287daf32e", "f37b1515c52248d887b9b415576f5253"], "properties": ["color, grass, moss", "animal, head, fox, chihuahua, cat"], "captions_pred_pc": ["above a black and white drawing of a long, curved line on a white background", "above a black and white image of a butterfly shaped object"], "captions_pred_image": ["a 3d sculpture of a piece of white chocolate in the shape of a map", "a 3d model of a fox's head"], "question": "which entity is a painting?", "label": 1}, {"captions": ["an orange and white striped rocket model.", "a white 3d printed mickey mouse dice with various numbers and symbols on it."], "sample_ids": ["9f19d5d47d174d3382c7dc31aaf22f0b", "e2645ac544844f3c981203134a99c30c"], "properties": ["color, orange, white", "- material is plastic- shape is dice- color is white"], "captions_pred_pc": ["a black and white drawing of a tree on a white background vector illustration of a black and white drawing of a tree on a white background, isolated on a white background illustration of a black and white drawing of a tree on a white background, isolated on a white background illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a", "a circle of dots with the number 2 in the center"], "captions_pred_image": ["a 3d model of a rocket on a gray background", "a 3d printed white dice with a mickey mouse face"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a small house with a roof and ceiling-mounted air conditioner.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["6965067ea9e34357a7af21a2f078fbac", "a17477b445b3443189dad22f768b888b"], "properties": ["roof, air conditioner, house", "roof, pillar, stairs"], "captions_pred_pc": ["a black and white illustration of a window", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d rendering of a small house with a covered porch", "a 3d model of a small building with a balcony"], "question": "which house has a roof", "label": 1}, {"captions": [" a house featuring a pink-purple roof with trusses and wooden ceiling beams.", " a small white building with stairs and a white table."], "sample_ids": ["b6b6a3f82bdd47c3afaf9af885ba8703", "e30374c614f54fdb90f35b96b071349d"], "properties": ["roof, trusses, beams", "building, stairs, table"], "captions_pred_pc": ["a black and white pattern of dots in the shape of a square royalty free illustration", "above a black and white drawing of a cat sitting on top of a letter 'e'"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d model of a building with a staircase"], "question": "which building has stairs?", "label": 1}, {"captions": [" a wooden object, including a board, piece of wood, box, and shelf.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["c986212445a1466ca7be7b5ac6bea729", "97e000ff41094665afd94ea565da8b13"], "properties": ["wood, board, shelf", "roof, material, wood"], "captions_pred_pc": ["a black and white drawing of snowflakes on a white background", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d rendering of a piece of marble", "a 3d model of the roof of a building"], "question": "which object is made of wood", "label": 1}, {"captions": [" a furnished room featuring a table, chairs, desk, shelves, and a small kitchen and living area.", " of a small white building with stairs and a lid."], "sample_ids": ["9403cf50e8cb44c195b76afd89d0c9fb", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["furniture, room, kitchen", "building, stairs, lid"], "captions_pred_pc": ["a black and white image of a room with dots all over it", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a room with a table, chairs, and shelves", "a 3d model of a white box on a gray background"], "question": "which entity has a lid?", "label": 1}, {"captions": [" a small house with a tree, pool, and pond in a green and blue landscape.", " a small house with a staircase, balcony, and wooden floor."], "sample_ids": ["e22489ba182f45cb81f0a83f22abe9bd", "e67e211004cb450cbaf8139dd74ba39b"], "properties": ["house, tree, pool", "floor, staircase, balcony"], "captions_pred_pc": ["in 15 words or less a black and white image of a square with dots around it", "a black and white drawing of a wallet"], "captions_pred_image": ["a 3d model of a house and a tree in a box royalty free 3d model preview no.3", "a 3d model of a bench on a wooden floor"], "question": "which house has a staircase", "label": 1}, {"captions": [" a blue and white building structure with a table and suspended ceiling.", " a multicolored metal building structure with a roof."], "sample_ids": ["fc63507c452c4a4b89f614f2b8bff76a", "22483891fd124baca3bbc6a6a49adc9c"], "properties": ["color, table, ceiling", "color, roof, structure"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a room", "of a black and white photo of a bike on a white background"], "captions_pred_image": ["a 3d model of a table with multiple tables stacked on top of each other", "a 3d model of a barn structure"], "question": "which structure has a roof", "label": 1}, {"captions": ["a collection featuring a furnished room, destroyed building, us map, house with roof, flying plane, and a ring with paper.", " a snowy city with buildings and a plane flying overhead."], "sample_ids": ["f13d2d1d78cd49e78f3430abbb251edd", "cc63ceb2b5e84872a1a1f6423de419e2"], "properties": ["collection, room, destroyed, house, roof, plane, ring, paper", "building, plane, city"], "captions_pred_pc": ["a black and white image of a person's face", "a black and white photo of an airplane on a white background"], "captions_pred_image": ["a black and white photograph of a person sitting on a couch", "a 3d model of a city in black and white"], "question": "which entity has a plane flying over it?", "label": 1}, {"captions": [" of a multi-colored spooling machine with wires and circuit board, featuring a lamp and shelf with toys.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["0e3f5cc16806492b948d41a748819ce3", "bded33af34104b9686b845dfd18309a9"], "properties": ["color, shape, material", "table, staircase, light"], "captions_pred_pc": ["a black and white image of a decorative tile", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d rendering of an electronic device on a white surface", "a 3d model of a small table with a staircase"], "question": "which object is made of wood", "label": 1}, {"captions": [" of a white plastic bar, resembling a shelf or towel rod, with a metal light fixture.", "a victor calculator with a black plastic cover and wall-mounted design."], "sample_ids": ["f84dec547a3d4710b48db11fc9fa489a", "88ffa01f4fc34a8cb3e2a659e9e26125"], "properties": ["light source, fixture, color", "cover, black, plastic"], "captions_pred_pc": ["a black and white photo of a small square on a white background", "of a black and white image of a skateboard"], "captions_pred_image": ["a 3d rendering of a white object on a gray background", "a victor calculator on a white background"], "question": "which object has a black plastic cover", "label": 1}, {"captions": [" a small house with a blue roof.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["fa21afd3a99d448cb23fa527a784769c", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["roof, color, blue", "house, pool, balcony"], "captions_pred_pc": ["a house made of dots on a white background a house made of dots on a white background royalty free illustration", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a house with a porch and balcony royalty-free 3d model preview no.2", "a 3d model of a modern house"], "question": "which house has a pool", "label": 1}, {"captions": ["a 3d rendered coffee table with a black and brown base and a square ceiling light.", " tall grass, plants, rocks, and a tree."], "sample_ids": ["27a365f067004d9c9c58e40c12827ce0", "eefed882ed5f4711bc5a76332d9712f3"], "properties": ["baseColor, black, brown, tableTopColor, brown, black", "grass, plants, rocks"], "captions_pred_pc": ["a black and white image of a patterned square on a white background", "a black and white drawing of a tree"], "captions_pred_image": ["a 3d model of a coffee table", "a 3d model of a group of trees"], "question": "which entity has more grass", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "a17477b445b3443189dad22f768b888b"], "properties": ["color, shape, and size", "roof, pillar, stairs"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "a black and white image of a square with dots"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": [" a house with a green roof and lawn.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["roof, green, lawn", "island, mountain, grass"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more grass", "label": 1}, {"captions": ["a featuring a red hat, floating cup, bowl filled with candy, and a strawberry.", " a damaged room with destroyed furniture, featuring a kitchen, bathroom, and broken window."], "sample_ids": ["e27a9fd533dc41da9cf2eeb8fee2a5af", "06dd6456dc244a51b6b6e1c8524820de"], "properties": ["hat, candy, strawberry", "room, furniture, window"], "captions_pred_pc": ["a black and white illustration of two spheres", "above a black and white drawing of a person sitting on a bench"], "captions_pred_image": ["a black and white image of a person wearing a hat", "a 3d image of a room with a person in it"], "question": "which entity has a kitchen?", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " a house with roof trusses and wooden beams on a suspended ceiling."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "3c2e3a3670b042069bd8290e2c357702"], "properties": ["roof truss, insulation, suspended ceiling", "roof trusses, beams, suspended ceiling"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "above a black and white drawing of a building"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d model of a house with a roof in progress royalty free 3d model preview no. 1"], "question": "which entity has a suspended ceiling", "label": 1}, {"captions": [" a small pink stone pillar/column.", "a black and white of a knife/sword with a handle."], "sample_ids": ["bef329bb8d9f467cb86b258030dbf9ff", "c7692fa635e049bda0a2039fa5a784a4"], "properties": ["size, material, color", "image, color, black and white"], "captions_pred_pc": ["a black and white image of a small square with black dots", "of a black and white knife on a white background"], "captions_pred_image": ["a 3d model of a stone column on a white background", "a black and white image of a knife"], "question": "which entity is not a black and white image?", "label": 1}, {"captions": [" a green electrical utility box with a warning sign and a small blue box on a concrete base.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["8e39d4766ed4444ea527d6c5ea33a5ef", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, base, warning", "a, material, clay"], "captions_pred_pc": ["a black and white illustration of a box with dots on it", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of an electrical box royalty-free 3d model preview no.2", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": ["a featuring a sailboat with a hook, a bird suspended in the air, and a pair of scissors.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["f57ae66555d34349aeadc38b33f8f267", "bf18bfd89efd43389781050230467d58"], "properties": ["a, bird, hook", "Lights, number, five"], "captions_pred_pc": ["of a 3d scan of a person's torso and limbs", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a black and white photo of a kite flying in the sky", "a white chandelier with five white shades"], "question": "which entity has more lights", "label": 1}, {"captions": [" a house with a green roof and lawn.", " a small white house with stairs and a wall-mounted shelf."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "10c4ba5b0db4490db9c00c21c94cb41f"], "properties": ["roof, green, lawn", "house, color, white"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "above a black and white drawing of a bench"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a 3d model of a small white building"], "question": "which house is white", "label": 1}, {"captions": [" a cityscape featuring various buildings, trees, an airport runway, and a plane.", " of a tree stump and rock with flowers on them."], "sample_ids": ["9b45036d3f0342a78db9a25938dc77fc", "3f74af45aeeb43ee95e2c8a5e3afeae6"], "properties": ["building, tree, plane", "flower, rock, tree stump"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a snowflake in the shape of a snowflake royalty free illustration", "above a black and white drawing of a flower on a white background"], "captions_pred_image": ["a black and white image of an industrial area with buildings and trucks in the foreground", "a 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree st"], "question": "which entity has more flowers", "label": 1}, {"captions": [" a small white bookcase-like building with stairs and a light fixture.", "a featuring a building, a plane, a printing machine, a large machine, a room with a computer, and a large gray box."], "sample_ids": ["5f99eb9d1f1e4d57b5690446f832c841", "7e2b63ba4ce24cecacea67dd052016c1"], "properties": ["building, color, white", "building, plane, room"], "captions_pred_pc": ["in 15 words or less a black and white image of the letter 'f' made up of dots", "a black and white image of a rectangular frame with dots on a white background a black and white image of a rectangular frame with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a bookshelf on a white background", "a 3d model of a box with a lot of items inside"], "question": "which building has a room?", "label": 1}, {"captions": [" of a white spiral, earbuds, ceiling light, and silver ring with black and white scissors and design.", "three white plastic containers with lids, including a box, a cylinder, and a bottle."], "sample_ids": ["c69f60b389124ad9b4f81c64ec332054", "67e8933750254cd8afddbf4865ae9e39"], "properties": ["earbuds, light, ring", "box, cylinder, bottle"], "captions_pred_pc": ["a black and white drawing of a needle and thread", "a black and white dots pattern on a white background"], "captions_pred_image": ["a black and white illustration of a pair of sunglasses and a pair of scissors next to each other on a white background", "a 3d model of a plastic bottle, a plastic container, and a plastic lid"], "question": "which entity has a box", "label": 1}, {"captions": [" a house featuring a detailed roof structure and a suspended ceiling with a map on it.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["ee7e6031912b46bc8ca7205a959c5c16", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["roof, structure, suspended", "house, pool, balcony"], "captions_pred_pc": ["a black and white image of a piece of lace", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a house with a metal roof", "a 3d model of a modern house"], "question": "which house has a pool", "label": 1}, {"captions": ["a featuring a flying object, hand holding a rock, floating paper, cityscape, piece of wood, airborne fish, and street with buildings.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["e04ad505d0c14dcbb593c49be7d04546", "97e000ff41094665afd94ea565da8b13"], "properties": ["a, hand, holding, rock, paper, floating, fish, street, buildings", "roof, material, wood"], "captions_pred_pc": ["a black and white illustration of a rock on a white background", "a black and white drawing of a floor plan"], "captions_pred_image": ["a black and white image of a rock in the air", "a 3d model of the roof of a building"], "question": "which roof is made of wood", "label": 1}, {"captions": ["a white featuring a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet.", " of a lidded trash can."], "sample_ids": ["f28f57745af04115b0bacdde80c3b9ee", "b166e9a1c5d540c19a1075b7f064f42f"], "properties": ["a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet", "lidded, trash can, color black"], "captions_pred_pc": ["a black and white illustration of a pair of sunglasses", "a circle of dots on a white background a circle of dots on a white background vector illustration of a circle of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a medieval drinking horn 3d model available for 3d printing royalty free 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d", "a 3d model of a trash can"], "question": "which trash can is black", "label": 1}, {"captions": ["an orange and white striped rocket model.", " a house with a roof and beams."], "sample_ids": ["9f19d5d47d174d3382c7dc31aaf22f0b", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["color, orange, white", "roof, beams, house"], "captions_pred_pc": ["a black and white drawing of a tree on a white background vector illustration of a black and white drawing of a tree on a white background, isolated on a white background illustration of a black and white drawing of a tree on a white background, isolated on a white background illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a rocket on a gray background", "a 3d model of a building with a roof"], "question": "which object has a roof and beams", "label": 1}, {"captions": ["a featuring a teddy bear on a rainbow, accompanied by a baby, a cat, a dog, and a drink, with a pink and green sign displaying \"close the sky over dreams.\"", " a wooden roof structure with a pink roof."], "sample_ids": ["80dfbe37b3d74f11b712ca1ad6570f70", "b70565bda31d42958d3597bf6067ddd2"], "properties": ["image, color, pink", "roof, color, pink"], "captions_pred_pc": ["above a black and white photograph of a dog in a bowl", "above a black and white image of a metal grate"], "captions_pred_image": ["a 3d sculpture of an animal on a piece of paper", "a 3d model of the roof of a building"], "question": "which entity has a roof that is pink?", "label": 1}, {"captions": [" of a pillow featuring a hot dog shape and a bag of chips with a dragon design.", " a small, rusty toy character with a hat, dart, dartboard, and leaves, standing on a rock."], "sample_ids": ["4ae6ac813d584d12a5d5d608a595bfe5", "a24872444e5c468a9264d503a9ca7276"], "properties": ["shape is hot dog, design is dragon, color is black", "rusty, hat, dartboard"], "captions_pred_pc": ["a black and white illustration of an eye with dots", "in 15 words or less an illustration of a 3d character on a white background stock illustration"], "captions_pred_image": ["a black and white photo of a pillow with a soda can on it", "a 3d sculpture of a teddy bear standing on a pedestal"], "question": "which entity is a toy?", "label": 1}, {"captions": [" a white table with grey legs, a white top, writing on it, and three legs.", " a small, rocky island with diverse terrain and scattered rocks."], "sample_ids": ["68e0d097351843a3980421f2ae624c59", "09f2cf267e954c958828325067bcc36a"], "properties": ["white, top, writing", "island, terrain, rocks"], "captions_pred_pc": ["a group of black dots on a white background stock illustration a group of black dots on a white background royalty free illustration", "above a black and white photo of a small island in the middle of a body of water"], "captions_pred_image": ["a table with writing on it and a mouse on top of it", "a black and white image of a piece of dirt on the ground"], "question": "which entity has more rocks", "label": 1}, {"captions": ["s of a rock, boat, plane, and leaf on a stick.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["be0884a7ced34b3d92687b6087798a1e", "bded33af34104b9686b845dfd18309a9"], "properties": ["s, stick, leaf", "table, staircase, light"], "captions_pred_pc": ["above a black and white drawing of an object floating in the sky", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a black and white photograph of a rock on a sandy surface", "a 3d model of a small table with a staircase"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" a modern house with a pool, balcony, and a small white box with a hole in it.", "a featuring a white box with a hole, a paper clip, and a lock."], "sample_ids": ["795cebc8a9bd4780aa44c8dbccfd6d1a", "839e143bb1684aaa955f2c3e0cf4eef2"], "properties": ["house, pool, balcony", "box, paper clip, lock"], "captions_pred_pc": ["above a black and white image of a room with a lot of dots", "above a black and white image of a clock in the shape of a spiral"], "captions_pred_image": ["a 3d model of a modern house", "a 3d model of a stapler with a staple in it royalty free 3d model no."], "question": "which box has a paper clip and a lock?", "label": 1}, {"captions": [" a wooden roof structure with a pink roof.", " a house with a roof, wooden beams, and chimney."], "sample_ids": ["b70565bda31d42958d3597bf6067ddd2", "be1376023c274bdda995d54f3694157f"], "properties": ["roof, color, pink", "roof, beams, chimney"], "captions_pred_pc": ["above a black and white image of a metal grate", "a black and white drawing of a bathroom with a shower"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d model of a house with a roof"], "question": "which structure has a chimney", "label": 1}, {"captions": [" a futuristic black and gold spaceship with a gun on it.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["6c34eb48b0c44667864a2af3fed92d6c", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["color, shape, gun", "roof, color, yellow"], "captions_pred_pc": ["above a black and white photograph of a fighter jet", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a futuristic space fighter", "a 3d model of a house with a roof"], "question": "which entity is a building?", "label": 1}, {"captions": [" a torn, open egyptian book with a broken cover, featuring a box with an open lid and a picture of a woman on it.", " a house with a yard, trees, bushes, and surrounding buildings."], "sample_ids": ["fb0b2d4f3d3548c792d8d72260429435", "7f8942ef51dd4246993a587a12df168c"], "properties": ["cover, torn, open", "house, yard, surrounding buildings"], "captions_pred_pc": ["a 3d illustration of a sponge on a white background 3d illustration of a sponge on a white background royalty free illustration", "a black and white image of a truck on a white background"], "captions_pred_image": ["a black and white image of a torn bank note", "a 3d model of a house in the middle of a field"], "question": "which entity has a yard", "label": 1}, {"captions": ["a featuring a small room with a bunk bed, desk, chair, table, and a blue house.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["dd3a9323ed514ccab330973ff9588015", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["room, bed, desk", "house, roof, wooden"], "captions_pred_pc": ["a black and white drawing of a door", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a small room with a bunk bed", "a black and white photograph of a birdhouse"], "question": "which house has a roof", "label": 1}, {"captions": ["s of a cat, fish, person with a hat, pig with a green hat and swimsuit, and a green and pink bird.", " of a wrecked plane, ship, and bird on a pile of rocks with grass."], "sample_ids": ["402601779d1d4146b4cde106dfff1b27", "b0c703df20154bbf9fd8707c61137fc5"], "properties": ["s, cat, fish, person, pig, bird", "plane, ship, bird"], "captions_pred_pc": ["above a black and white photo of a toy octopus on a white background", "a black and white watercolor map of the state of ohio"], "captions_pred_image": ["a snowflake in the air on a cloudy day", "a black and white photograph of a pile of debris on the ground"], "question": "which entity has a plane", "label": 1}, {"captions": ["three white plastic containers with lids, including a box, a cylinder, and a bottle.", " a wooden staircase with a railing and table."], "sample_ids": ["67e8933750254cd8afddbf4865ae9e39", "956247bea850458199c651037d4b1d7f"], "properties": ["box, cylinder, bottle", "railing, table, staircase"], "captions_pred_pc": ["a black and white dots pattern on a white background", "above a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic container, and a plastic lid", "a 3d model of a table with a staircase"], "question": "which object has a table?", "label": 1}, {"captions": ["a 3d white cube with windows resembling a building.", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["4a07a5293f024bb0a353954a056ef626", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["- material is white- color is white- texture is textured", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["a black and white image of a square made up of small dots", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a 3d model of a cube", "a 3d rendering of a plastic box with several compartments"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a small house with a tree, pool, and pond in a green and blue landscape.", "a featuring a room with a staircase, a damaged bus and rv, two houses, a large white box, and destroyed buildings with debris."], "sample_ids": ["e22489ba182f45cb81f0a83f22abe9bd", "a47dcf3d3cf34c58af17b6715d6f1232"], "properties": ["house, tree, pool", "room, staircase, bus"], "captions_pred_pc": ["in 15 words or less a black and white image of a square with dots around it", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d model of a house and a tree in a box royalty free 3d model preview no.3", "a 3d image of a building with a lot of debris"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" a house featuring a purple roof and a suspended ceiling with a light fixture.", " a large house with a roof on a platform."], "sample_ids": ["579b43057ef74bd08d06bdd3e8d973a0", "cb3e09a301b746918a682a595037c7f7"], "properties": ["roof, purple, suspended", "roof, platform, house"], "captions_pred_pc": ["a black and white image of a piece of paper with a pattern of dots on it", "a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of the roof of a building royalty free 3d model no.", "a 3d model of a small house"], "question": "which house has a roof on a platform", "label": 1}, {"captions": [" a small black box on a purple square.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["75f209e543c046669099190953616acc", "06a1c233fb444830b577aa06e2c01294"], "properties": ["color, shape, size", "house, tree, hill"], "captions_pred_pc": ["a black and white image of the letter l", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a 3d model of a small black box sitting on top of a gray surface", "a black and white image of a house in the middle of a field"], "question": "which entity is a house?", "label": 1}, {"captions": [" a crab with long legs, wires, and tentacles, resembling a hybrid of an octopus, squid, spider, and robot.", " a rusty green metal box with a handle and a gun inside."], "sample_ids": ["ec5914b53b6a4cde8de4820050bc46c5", "76cfd0e88ce243d483919a018a4f1a9e"], "properties": ["resembles, octopus, squid, spider, robot", "box, handle, gun"], "captions_pred_pc": ["a black and white illustration of a jellyfish", "a black and white square with dots on a white background"], "captions_pred_image": ["a 3d model of a robotic octopus on a white background royalty free 3d model preview no.2", "a 3d rendering of a metal box with a handle"], "question": "which entity has a handle?", "label": 1}, {"captions": [" a long row of steel shelves in a warehouse, featuring a suspended scaffolding system.", " of a white and brown sea shell with a hole and small pearls on it, resembling a sea urchin."], "sample_ids": ["578fe7a7bd754b889be33aea99cf5050", "411c164757fc4de68dfecb35fa858223"], "properties": ["a, material, steel", "resembles, sea urchin, shell"], "captions_pred_pc": ["above a black and white image of a rack with multiple shelves", "in 15 words or less a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a"], "captions_pred_image": ["a 3d model of a large metal structure", "a 3d model of a sea urchin"], "question": "which entity is not a shell?", "label": 0}, {"captions": [" a yellow and white structure with yellow poles.", " a small house with trees and plants, featuring a white box with a blue lid and a green triangle, accompanied by a christmas tree."], "sample_ids": ["bada91e216fd486d9e3e356d48978f33", "4a889132cc444d10bfcbf6c760984416"], "properties": ["color, shape, poles", "a, color, white"], "captions_pred_pc": ["a black and white drawing of a dotted pattern on a white background a black and white drawing of a dotted pattern on a white background royalty free illustration", "a black and white illustration of a dandelion on a white background dandelion illustration on a white background stock illustration \u00a9 iStock/Getty Images"], "captions_pred_image": ["a 3d model of a building with multiple levels", "a 3d model of a desk and chair"], "question": "which entity is not a building?", "label": 0}, {"captions": ["a featuring a pile of food, leaves, shredded paper, and rocks with scattered broken paper pieces.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["5206d4d96c2d428b9c1f7ee0e13bcffb", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["food, leaves, shredded paper, rocks", "island, mountain, grass"], "captions_pred_pc": ["a black and white image of a bird in flight", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d model of a mountain range on a white surface", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more grass", "label": 1}, {"captions": [" a white and black chair with a black handle and armrest.", "a white teapot with pink flowers and a handle."], "sample_ids": ["cf1f435c54b046f68d6603cd3369a94f", "f6c5e8931d164979a71914127c7e5438"], "properties": ["color, black, white, handle, armrest", "color, white, handle"], "captions_pred_pc": ["a black and white drawing of a geometric shape", "a black and white dots on a white background"], "captions_pred_image": ["a 3d rendering of a white chair with a black handle", "a white tea kettle with a black handle and floral design"], "question": "which object has a handle", "label": 1}, {"captions": [" a vibrant city skyline featuring various colored buildings, trees, and skyscrapers.", " a small bedroom with wooden floors, walls, roof, and shelf."], "sample_ids": ["1a1fb9b0d83845f6b1238fb45e0defff", "e602ac60041f4b4f84c044161e478781"], "properties": ["color, skyline, buildings", "floor, wall, roof"], "captions_pred_pc": ["a black and white illustration of a city skyline", "above a black and white image of a decorative metal bar"], "captions_pred_image": ["a black and white 3d model of a city skyline", "a 3d model of a room with wooden walls and a rug on the floor"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": [" of an orange shopping bag with handles and a \"t\" logo.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["392dcf37195e43948cfbffe099082108", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["color, logo, handle", "torso, breasts, pattern"], "captions_pred_pc": ["a black and white image of a purse with a chain", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a 3d model of a shopping bag royalty free 3d model no.2", "a 3d model of a woman's chest"], "question": "which entity has a pattern", "label": 1}, {"captions": [" of a house with a roof and a room featuring a pink ceiling.", " a small white house with stairs and a spiral staircase, featuring a white table and ceiling light."], "sample_ids": ["9c9f4e7f7c9442df99a9dc41870083c5", "e9e1cc7fae22458197a61f43a9c355f4"], "properties": ["roof, room, ceiling", "house, staircase, table"], "captions_pred_pc": ["a black and white drawing of the letter 'l' on a white background", "above a black and white photograph of a dog in a frame"], "captions_pred_image": ["a 3d model of a building with a roof and walls", "a 3d model of a small house with a spiral staircase"], "question": "which house has a staircase?", "label": 1}, {"captions": [" of a wooden cabinet with drawers and filing features.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["abbc90bbd5474f73b16482ccd10e07ec", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["Cabinet, Drawers, Filing", "island, terrain, water"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d rendering of a white wooden chest of drawers", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": ["a featuring a plane, a small plane, a bird, and a dragonfly all flying in the air.", "a featuring a mossy rock, a piece of wood, a rocky island, a plane, a tree branch, a gray rock, a rocky mountain, and a moss-covered tree branch."], "sample_ids": ["f39783d05dec49e49482c407d656e0f7", "09e5288a9e98421985ee6e0042b3c325"], "properties": ["a, plane, small plane", "mossy, rock, rocky"], "captions_pred_pc": ["of a black and white photo of an airplane propeller", "a black and white illustration of a small island in the middle of a body of water"], "captions_pred_image": ["a 3d printed model of a fighter plane in the air", "a 3d model of the comet 67p/churyumov-gerasimenko"], "question": "which object is not a plane?", "label": 0}, {"captions": [" a two-story building with stairs and a three-tier display.", "3d white playground set with slides and swings, featuring a plane, building, and dragon in the sky."], "sample_ids": ["1d817b7b6ded439d8b92eeab87e4cf8d", "e694d53545d449319a64cceb0280c3c6"], "properties": ["tiers, building, stairs", "3d, slide, swing"], "captions_pred_pc": ["for a black and white photo of a person sitting on a bench", "for a 3d model of the letter 'j'"], "captions_pred_image": ["a 3d model of a building with two floors and a spiral staircase", "a 3d model of a playground slide"], "question": "which entity has a plane in the sky?", "label": 1}, {"captions": [" a cityscape featuring various buildings, trees, an airport runway, and a plane.", " a building surrounded by various structures, including a skyscraper, in a city setting."], "sample_ids": ["9b45036d3f0342a78db9a25938dc77fc", "3b780ced7a814f86b3ee67f3596dddce"], "properties": ["building, tree, plane", "building, skyscraper, city"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a snowflake in the shape of a snowflake royalty free illustration", "a black and white drawing of an airplane"], "captions_pred_image": ["a black and white image of an industrial area with buildings and trucks in the foreground", "a 3d model of a knife with a building on top of it"], "question": "which entity is in a city setting?", "label": 1}, {"captions": [" of a white plastic tube with a hole and a chip on it.", " of a yellow metal locker with legs, wheels, and metal brackets."], "sample_ids": ["9968e06a62e8487ea33460e640abc573", "e3fde8fe782c41f0b141c9f1b8e13aa5"], "properties": ["color is white, material is plastic, shape is tube", "metal, legs, wheels"], "captions_pred_pc": ["a black and white image of a broom on a stand", "a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white illustration of a black and white"], "captions_pred_image": ["a white object on a gray background", "a 3d model of an old metal locker"], "question": "which object is made of metal", "label": 1}, {"captions": ["low poly of a christmas tree", " a house with a yard, trees, bushes, and surrounding buildings."], "sample_ids": ["89bab7f26bc7418d96beb3ac0059f607", "7f8942ef51dd4246993a587a12df168c"], "properties": ["low poly, christmas, tree", "house, yard, surrounding buildings"], "captions_pred_pc": ["above a black and white image of a small, round object", "a black and white image of a truck on a white background"], "captions_pred_image": ["a 3d model of a christmas tree on a white background", "a 3d model of a house in the middle of a field"], "question": "which entity is a house?", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " a mountainous landmass, resembling a small island or state like kentucky or wyoming."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "8ca9b999b69c4965bd9eb4445d605bf2"], "properties": ["color, white, black, white", "mountainous, landmass, state"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "a black and white map of the state of new york"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d model of a piece of paper"], "question": "which entity is a landmass", "label": 1}, {"captions": [" a house with a blue roof.", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["8ff693cd3ca74f8a901ca259b8b3a7ac", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["roof, color, blue", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["a black and white drawing of a cross on a white background royalty free illustration", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a 3d model of a house with a roof", "a 3d rendering of a plastic box with several compartments"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a blue and white building structure with a table and suspended ceiling.", " a small wooden house."], "sample_ids": ["fc63507c452c4a4b89f614f2b8bff76a", "4cb4dba1237443eb8dc299530fa12521"], "properties": ["color, table, ceiling", "house, material, wood"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a room", "a house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots"], "captions_pred_image": ["a 3d model of a table with multiple tables stacked on top of each other", "a 3d model of a small cottage"], "question": "which house is made of wood", "label": 1}, {"captions": [" of a green alien creature with long legs and a long tail.", " of a toy mushroom character with a white and brown head."], "sample_ids": ["ad7c9475a4e24462bf6b5c24bcde317a", "ae8a73809d4647c09cc82f403e47de1d"], "properties": ["color, leg, tail", "color, head, white and brown"], "captions_pred_pc": ["a black and white image of a sword on a white background", "a black and white illustration of a butterfly sitting on a dandelion stock illustration"], "captions_pred_image": ["a 3d model of an alien creature with a long tail", "a 3d model of a gray and white cartoon character"], "question": "which entity has a white and brown head", "label": 1}, {"captions": [" a destroyed car with rusted, broken metal and torn paper.", " a wooden staircase with a red and white railing and a red arrow."], "sample_ids": ["3fe31c3bf5cd4574a8ca02222411a988", "bb8bb4c9972d4b718b8bbd3ed5fdd14d"], "properties": ["metal, rusted, paper", "arrow, red, white"], "captions_pred_pc": ["a black and white drawing of a person sitting in a chair", "above a black and white image of a square with a white cross in the center"], "captions_pred_image": ["a black and white image of a piece of debris on the ground", "a 3d model of a spiral staircase"], "question": "which entity has a red arrow", "label": 1}, {"captions": ["a 3d gold-plated cylindrical device with a green screen and an emerald green stone.", "a 3d wooden toy castle with red and yellow towers, a red roof, and a man inside."], "sample_ids": ["adec10493e06436c967d5797f7085225", "311f6655ed854899b07ea10f3613ef7a"], "properties": ["color, material, shape", "a, color, red"], "captions_pred_pc": ["for a black and white image of an object on a white background", "a black and white drawing of a wallet on a white background"], "captions_pred_image": ["a 3d rendering of a car's air intake system", "a 3d model of a castle with two towers"], "question": "which entity is made of wood", "label": 1}, {"captions": ["a 3d object featuring a fish with a long nose and open mouth, a cat head with red, yellow, and green stripes, a frog with a long snout, a white horse with green and yellow stripes, and a wolf mask.", " a gray, metal pillar/cylinder."], "sample_ids": ["276699bb0f974c47b4e2954cfcd1651c", "11391e6bab574dc0be8f2440fbc3b724"], "properties": ["a, color, white", "color is gray, material is metal, shape is cylinder"], "captions_pred_pc": ["a black and white image of a skull in the shape of a butterfly", "of a black candle on a white background"], "captions_pred_image": ["a 3d model of an animal with a long nose", "a 3d model of a candlestick"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a white plastic tube or metal bar, resembling a knife.", "white of a rhino head with horns."], "sample_ids": ["8fd3836862a44a8d8b4d224bfc30c2c3", "8481aade84de47cab1a9accf8067e678"], "properties": ["a knife, blade, handle", "image, rhino, head"], "captions_pred_pc": ["a black and white image of a shelf with a white background", "of a penguin skull in black and white"], "captions_pred_image": ["a piece of white plastic on a gray background", "rhino head 3d model royalty free 3d model preview no 3"], "question": "which image is of a rhino head?", "label": 1}, {"captions": [" a red \"welcome to northwich\" billboard on a wooden base.", " of a white rock-like object, possibly a shell or ice."], "sample_ids": ["225e4094141d416faba7c5598dc55ff8", "096e42b466ec438d95c5d89a85191534"], "properties": ["base material is wood, color is red, message is welcome to northwich", "white, rock, shell"], "captions_pred_pc": ["a black and white illustration of a circular object with many small dots on it", "in one hundred words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words"], "captions_pred_image": ["a 3d model of a chalkboard on a pedestal royalty free 3d model preview no.2", "a 3d model of a white rock on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a pink-framed building structure with beams and trusses.", " a house featuring a roof with wooden trusses and a ladder."], "sample_ids": ["18e392c5360146eda498c5edab25b15c", "3cd410c4359a4cef98702963a2b9802b"], "properties": ["frame, beams, trusses", "roof, trusses, ladder"], "captions_pred_pc": ["a black and white drawing of a metal grate", "a black and white drawing of a tv screen on a white background"], "captions_pred_image": ["a 3d model of a building under construction", "a 3d model of the roof of a building"], "question": "which entity has a roof with wooden trusses and a ladder?", "label": 1}, {"captions": [" of a wooden nightstand with a hexagonal pattern, white top, and drawer.", " a small house with stairs and a roof."], "sample_ids": ["e6f2dbec6d464b4da4aa47bce242f6e5", "e9305c80010f4e3b9de9789f01a9bee5"], "properties": ["Drawer, Pattern, White", "roof, stairs, house"], "captions_pred_pc": ["a cross made of dots on a white background vector illustration of a cross made of dots on a white background royalty free illustration illustration of a cross made of dots on a white background vector illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a", "above a black and white image of a square with dots all over it"], "captions_pred_image": ["a 3d rendering of a wooden box with a handle", "a 3d rendering of a podium on a wooden floor"], "question": "which object has a roof", "label": 1}, {"captions": [" of a small wooden house with a blue roof and clock tower.", " a small building with a yellow roof, featuring a box, a chair, and a plane flying overhead."], "sample_ids": ["6b745457e06840119058883b35f78f58", "a2354f13774340d392fbf33564934aab"], "properties": ["roof, color, blue", "building, roof, yellow"], "captions_pred_pc": ["a black and white image of a building with dots", "a black and white image of a cell phone"], "captions_pred_image": ["a 3d model of a house with a steeple on top", "a 3d rendering of a machine with a conveyor belt"], "question": "which building has a yellow roof", "label": 1}, {"captions": [" of a white cube-shaped shelf", " tall grass, plants, rocks, and a tree."], "sample_ids": ["1fc0969de9bf47cab82cae4cc8fe6c2f", "eefed882ed5f4711bc5a76332d9712f3"], "properties": ["shape is cube, color is white, material is wood", "grass, plants, rocks"], "captions_pred_pc": ["a black and white pattern of dots on a white background a black and white pattern of dots on a white background royalty free illustration", "a black and white drawing of a tree"], "captions_pred_image": ["a 3d rendering of a white shelf on a gray background", "a 3d model of a group of trees"], "question": "which entity is made of grass", "label": 1}, {"captions": ["a small 3d purple teapot and elephant.", "a beaded bag and necklace set featuring the word \"bonno\" and blue, white, and yellow patterns."], "sample_ids": ["4a27592dc8164f709b44446ee11832c0", "15f0a38c3f134f2c801e92ac2ac5c124"], "properties": ["color, shape, material", "color, pattern, word"], "captions_pred_pc": ["a black and white 3d illustration of an elephant's head on a white background 3d illustration of an elephant's head on a white background royalty free illustration", "a black and white photo of a pair of scissors"], "captions_pred_image": ["a white ceramic teapot on a gray background", "a beaded necklace with the word banned on it"], "question": "which entity is a bag?", "label": 1}, {"captions": [" a modern orange leather swivel recliner chair and ottoman.", "a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars."], "sample_ids": ["943ae5d90d654498912d0d3d0114ba35", "1b3945962a4b4cda9fe939dc5d63e789"], "properties": ["color, orange, leather", "a room, a cake, a table"], "captions_pred_pc": ["a black and white illustration of a chair", "a black and white illustration of an object on a white background"], "captions_pred_image": ["a white leather lounge chair and ottoman", "a 3d rendering of a white room with various items in it"], "question": "which entity has a room?", "label": 1}, {"captions": [" a green electrical utility box with a warning sign and a small blue box on a concrete base.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["8e39d4766ed4444ea527d6c5ea33a5ef", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["color, base, warning", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["a black and white illustration of a box with dots on it", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a 3d model of an electrical box royalty-free 3d model preview no.2", "a white 3d model of a city skyline"], "question": "which image has a laptop?", "label": 1}, {"captions": [" a silver vintage racing car.", " a white and gold mirror on a wooden easel stand."], "sample_ids": ["9d8a1e856251435f9596c031005520bd", "0d10d734448d4a5d8d07b938c12d9d80"], "properties": ["color, silver, vintage", "color, white, gold"], "captions_pred_pc": ["a black and white image of a chair with dots all over it", "for a black and white image of a shoe on a white background"], "captions_pred_image": ["a 3d model of a vintage racing car", "a 3d model of a standing mirror on a white background"], "question": "which object is white and gold", "label": 1}, {"captions": [" a small island with trees, grass, water, and rocky terrain.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["d5e708b7549e48e2b02fd6fe9f197ec2", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["island, terrain, water", "torso, breasts, pattern"], "captions_pred_pc": ["a black and white photograph of a piece of paper with dots on it", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a 3d image of a small island in the middle of a lake", "a 3d model of a woman's chest"], "question": "which entity has a teddy bear?", "label": 1}, {"captions": ["a 3d baseball card in a clear plastic case.", " a house with a wooden-framed roof structure."], "sample_ids": ["04438ab47bd9430c97b5aac8f9907e4c", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["a, color, case", "roof, material, wood"], "captions_pred_pc": ["a black and white image of a pair of sunglasses", "a black and white drawing of a staircase"], "captions_pred_image": ["a black and white photograph of an airplane poster on a wall", "a 3d model of a building with a roof"], "question": "which entity is made of wood", "label": 1}, {"captions": ["a featuring a tv, chair, desk, laptop, man, couch, computer, and building.", " a small house on a hill in a field."], "sample_ids": ["d4208427217343e6af1b9b4a42a2f730", "bd873071252047d38160c4a5fdd2c1b7"], "properties": ["tv, chair, laptop", "house, hill, field"], "captions_pred_pc": ["for 3d illustration of a smartphone with a qr code on the back and a qr code on the front", "a black and white photograph of a piece of paper"], "captions_pred_image": ["a 3d model of a person sitting on a couch", "a black and white image of a small house"], "question": "which entity is outside?", "label": 1}, {"captions": [" a staircase with a railing, table, and chair, featuring a square ceiling light.", " featuring a moss-covered rock, mushroom in grass, and green leaves with a brown spot."], "sample_ids": ["51a0fba79bce472a8b827b78a110b77f", "34ebe81ae93841ca829efd15aee4d8c1"], "properties": ["stair, table, chair", "moss, mushroom, grass"], "captions_pred_pc": ["for a black and white image of a toilet paper holder", "for a black and white illustration of a cloud on a white background"], "captions_pred_image": ["a 3d model of a staircase in a room", "a 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor"], "question": "which entity has a moss-covered rock?", "label": 1}, {"captions": ["a 3d pink spiky spherical flower.", "a featuring a small boat, a rock with a hole, and blue water."], "sample_ids": ["039a3fc74e39450883c46acbe2f57476", "7ccdffc0d6404e8d9144260255ea0c5c"], "properties": ["color, shape, texture", "water, boat, rock"], "captions_pred_pc": ["a black and white drawing of a snowflake on a white background", "a black and white illustration of a surfboard"], "captions_pred_image": ["a 3d model of a snowflake on a white background", "a 3d image of an animal laying on the ground"], "question": "which entity has a hole in it", "label": 1}, {"captions": [" a small village featuring houses, trees, and a winding road.", " a bee and a rubik's cube next to each other."], "sample_ids": ["7acf46c0265d4e39b97ac084852abde8", "83a27b2b104e4f7f8b42c3c8654153db"], "properties": ["houses, trees, road", "bee, color, black and white"], "captions_pred_pc": ["in 15 words or less a black and white photo of a mountain landscape", "a black and white pattern of dots on a white background a black and white pattern of dots on a white background royalty free illustration"], "captions_pred_image": ["a black and white photograph of a small town", "a black and white image of a bee next to a cube"], "question": "which entity is black and white", "label": 1}, {"captions": [" of a pile of metal, torn and shredded paper, rocks, and a decayed animal.", " of a house with a roof structure, including a greenhouse."], "sample_ids": ["c117f1923cad4ecf9df61b6e3d633374", "d7483292784b4e2b81df1c50f2a8664a"], "properties": ["a pile of metal, torn and shredded paper, rocks, and a decayed animal", "roof, structure, greenhouse"], "captions_pred_pc": ["a black and white map of germany on a white background", "a 3d illustration of a window with dots on a white background 3d illustration of a window with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a crumpled piece of paper on a white background", "a 3d model of a building with a roof"], "question": "which entity is a building?", "label": 1}, {"captions": [" featuring a chair, table, and refrigerator.", "a white circular table with a black base and a white table lamp, accompanied by a black ceiling light with a hanging wire."], "sample_ids": ["cd967e38e9364ed28c4090e905740c9d", "ff7a1dec37f1416fbad0f0705486806c"], "properties": ["chair, table, refrigerator", "table lamp, ceiling light, table"], "captions_pred_pc": ["a 3d rendering of a white surface with black dots on it", "a black and white pattern of dots on a white background"], "captions_pred_image": ["a 3d model of a chair, a table, and a refrigerator", "an oval table with a mouse on top of it"], "question": "which entity has a table lamp", "label": 1}, {"captions": [" a white arrowhead-shaped rock.", " of a hammer with a long metal handle."], "sample_ids": ["d0d2a6bc3c3440028789eba8f9894d8e", "30f4b6bcbbb44f568cab4fd439d05145"], "properties": ["shape, color, material", "handle, metal, long"], "captions_pred_pc": ["of an arrowhead on a white background stock illustration illustration of an arrowhead on a white background royalty free illustration illustration of an arrowhead on a white background vector illustration of an arrowhead on a white background royalty free illustration illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on", "a black and white toothbrush on a white background"], "captions_pred_image": ["a 3d model of a snowflake on a white background", "a hammer with a wooden handle and metal head"], "question": "which object has a long metal handle", "label": 1}, {"captions": [" a destroyed house and plane amidst a town with buildings.", " a small island featuring a large building, trees, and a house on a hill, surrounded by a forest."], "sample_ids": ["0fd3ddca09194b8f94ef731af3b64a08", "d557c62e9be741a6b0f6b204d11a9c6f"], "properties": ["house, plane, town", "house, hill, forest"], "captions_pred_pc": ["above a black and white drawing of a piece of paper", "above a black and white illustration of a small island in the middle of a body of water"], "captions_pred_image": ["a 3d model of a damaged building", "a black and white image of a small island in the middle of a body of water"], "question": "which entity has a house on a hill?", "label": 1}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", "a black and white of a knife/sword with a handle."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "c7692fa635e049bda0a2039fa5a784a4"], "properties": ["color, material, structure", "image, color, black and white"], "captions_pred_pc": ["a black and white drawing of a room with dots", "of a black and white knife on a white background"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a black and white image of a knife"], "question": "which entity is a black and white image?", "label": 1}, {"captions": [" a white and red polka-dotted onion-shaped object.", "a 3d wooden toy castle with red and yellow towers, a red roof, and a man inside."], "sample_ids": ["d89c28f0761e471e9575a6a22c2cc3a0", "311f6655ed854899b07ea10f3613ef7a"], "properties": ["color, shape, pattern", "a, color, red"], "captions_pred_pc": ["a black and white photograph of a radish on a white background", "a black and white drawing of a wallet on a white background"], "captions_pred_image": ["a 3d model of a white and black polka-dotted object on a gray background royalty free 3d model no.3", "a 3d model of a castle with two towers"], "question": "which object is red?", "label": 0}, {"captions": ["an orange and white striped rocket model.", " of a small house featuring a flat, brown roof, table with two chairs and a stool, ceiling light, and a window."], "sample_ids": ["9f19d5d47d174d3382c7dc31aaf22f0b", "3a509431d96b43f8a7aebe2846f08b96"], "properties": ["color, orange, white", "roof, brown, flat"], "captions_pred_pc": ["a black and white drawing of a tree on a white background vector illustration of a black and white drawing of a tree on a white background, isolated on a white background illustration of a black and white drawing of a tree on a white background, isolated on a white background illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a", "a black and white drawing of a snowflake on a white background a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a rocket on a gray background", "a 3d rendering of a table and stool"], "question": "which entity has a brown roof", "label": 1}, {"captions": ["a featuring a small room with a bunk bed, desk, chair, table, and a blue house.", " a small wooden house with a green roof."], "sample_ids": ["dd3a9323ed514ccab330973ff9588015", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["room, bed, desk", "roof, color, green"], "captions_pred_pc": ["a black and white drawing of a door", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d model of a small room with a bunk bed", "a 3d model of a house with a ladder"], "question": "which house has a green roof", "label": 1}, {"captions": ["three white paper windmills and a city model with a nativity scene silhouette.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["fa5ee6165f31465d9d75d046818f4006", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["windmills, silhouette, city model", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white photo of a pair of sunglasses", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a cityscape on a white background", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a small, modern house with a green roof, located on a hill, surrounded by trees, grass, and a pond.", "a featuring a bench with flowers, a bridge, a white sculpture, a bicycle with a flower, a giraffe, and a palm tree."], "sample_ids": ["a452d5381dad4dc09f5ebe10635ae5fe", "12093c89a60941e7884b252bdc05104c"], "properties": ["house, roof, green", "giraffe, bench, flower"], "captions_pred_pc": ["above an illustration of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white", "a black and white drawing of a gear on a white background"], "captions_pred_image": ["a 3d model of a building with a black roof", "a 3d model of a sculpture made of sticks"], "question": "which entity has a bench with flowers?", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", " a red and white robot."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "75e058152d8e438a9c6f8abf112e8d89"], "properties": ["color, shape, and size", "color, red, white"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "a black and white drawing of a monkey wearing a hat"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d model of a robot standing in a neutral pose royalty free 3d model preview no 3"], "question": "which entity is red and white?", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", "a featuring a red hat, floating cup, bowl filled with candy, and a strawberry."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "e27a9fd533dc41da9cf2eeb8fee2a5af"], "properties": ["color, white, black, white", "hat, candy, strawberry"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "a black and white illustration of two spheres"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a black and white image of a person wearing a hat"], "question": "which entity has more candy", "label": 1}, {"captions": [" a city featuring various buildings, including one with a white roof and numerous white cubes, as well as a plane.", " a spider-like creature with long arms and legs."], "sample_ids": ["a3c50635c2a04e548e57d4f027899131", "199bcb789e0c439bb2eeb32f2425cc36"], "properties": ["building, roof, white, cubes", "arachnid, leg, arm"], "captions_pred_pc": ["above a black and white image of a map", "a black and white illustration of a spider on a white background"], "captions_pred_image": ["a 3d model of a city on a white background", "a black and white image of an alien creature"], "question": "which entity has more legs", "label": 1}, {"captions": [" a small house with a roof.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["0d2246e433ce4066b76489f17ba8d694", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["roof, house, small", "island, mountain, grass"], "captions_pred_pc": ["a black and white square made up of small dots on a white background", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d model of a house with a triangular roof", "a 3d image of a small island in the middle of a lake"], "question": "which entity has grass", "label": 1}, {"captions": ["royalty-free of a beaver on a skateboard.", "royalty-free of a beaver on a skateboard."], "sample_ids": ["9b6a637ead444bd28452178c74697653", "9b6a637ead444bd28452178c74697653"], "properties": ["image is royalty-free, beaver, skateboard", "image is royalty-free, beaver, skateboard"], "captions_pred_pc": ["for a black and white image of a person's hand holding a toothbrush", "for a black and white image of a person's hand holding a toothbrush"], "captions_pred_image": ["a beaver on a skateboard royalty free 3d model preview no. 1", "a beaver on a skateboard royalty free 3d model preview no. 1"], "question": "which image shows a beaver on a skateboard?", "label": 0}, {"captions": [" of a green-leafed plant in a white hanging pot.", "a black motorcycle helmet with a face mask and visor."], "sample_ids": ["bc75bd88ee014f3cb733e4af32c54cc6", "ad6df43a2ce24edfb15f5bb64755ed0d"], "properties": ["color, green, leaf", "color, black, visor"], "captions_pred_pc": ["a black and white illustration of a jellyfish", "a black and white circular shape made up of many small dots on a white background a black and white circular shape made up of many small dots on a white background illustration"], "captions_pred_image": ["a 3d model of a potted plant on a white background royalty free 3d model preview no.3", "a black motorcycle helmet with a visor on top"], "question": "which entity has a visor", "label": 1}, {"captions": [" of a white sheet of paper or box on a gray background.", " a small building with a school, house, soccer field, and white ceiling light, featuring a white and green paper clip."], "sample_ids": ["7b0c1e02d9b14f2fae4f1f7040661cc7", "97f487941d26472294e005fa97c403be"], "properties": ["color, white, background, gray", "color, white, green"], "captions_pred_pc": ["above a black and white photograph of an object", "for a black and white drawing of a computer keyboard"], "captions_pred_image": ["a 3d model of a paper airplane", "a 3d model of an office building"], "question": "which entity has a white and green paper clip?", "label": 1}, {"captions": ["a featuring a sailboat with a hook, a bird suspended in the air, and a pair of scissors.", " featuring a moss-covered rock, mushroom in grass, and green leaves with a brown spot."], "sample_ids": ["f57ae66555d34349aeadc38b33f8f267", "34ebe81ae93841ca829efd15aee4d8c1"], "properties": ["a, bird, hook", "moss, mushroom, grass"], "captions_pred_pc": ["of a 3d scan of a person's torso and limbs", "for a black and white illustration of a cloud on a white background"], "captions_pred_image": ["a black and white photo of a kite flying in the sky", "a 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor"], "question": "which entity has a mushroom in grass?", "label": 1}, {"captions": [" a wooden bench with a metal frame.", " a black triangular metal object with a clock and cross on it, and a small machine on top."], "sample_ids": ["c27b018330da406680e58b94266c310b", "b198a81dc41c4fde8be3ca51c3b0e676"], "properties": ["frame, material, wood", "metal, cross, clock"], "captions_pred_pc": ["a black line on a white background", "above a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a bench", "a 3d model of a piece of furniture"], "question": "which object has a clock on it", "label": 1}, {"captions": [" a small house with a blue roof, a door, and a pool.", " a large steel building with many columns and a pool."], "sample_ids": ["40c52c2d278345c5b4e8d00a991271dc", "2ce649a4152a45bab60d8cafa1dcdeb3"], "properties": ["door, roof, pool", "building material, pool, steel"], "captions_pred_pc": ["of a black and white photo of a window with fringes", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a concrete structure"], "question": "which building has a pool", "label": 1}, {"captions": [" a small white building with stairs and shelves.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["9e1f64d4fd514059be934077717536dc", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["building, stairs, shelves", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["a black and white image of a person standing in front of a white background", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a white 3d model of a building with stairs", "a white 3d model of a city skyline"], "question": "which image shows a laptop?", "label": 1}, {"captions": [" a modern white house with furniture and a black accent.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["e5025a1ca0034b1aa97a0d42edeeae0f", "97e000ff41094665afd94ea565da8b13"], "properties": ["color, white, furniture, black", "roof, material, wood"], "captions_pred_pc": ["above a black and white drawing of a floor plan", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d model of a white building with black and white tiles", "a 3d model of the roof of a building"], "question": "which house has a wooden roof", "label": 1}, {"captions": [" a white arrowhead-shaped rock.", "a 3d wooden toy on a wooden plate with a piece of fruit and bread with a knife."], "sample_ids": ["d0d2a6bc3c3440028789eba8f9894d8e", "1c389c8f46b345838e515b9747c1f982"], "properties": ["shape, color, material", "plate, fruit, knife"], "captions_pred_pc": ["of an arrowhead on a white background stock illustration illustration of an arrowhead on a white background royalty free illustration illustration of an arrowhead on a white background vector illustration of an arrowhead on a white background royalty free illustration illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on a white background illustration of an arrowhead on", "a black and white illustration of a hedgehog"], "captions_pred_image": ["a 3d model of a snowflake on a white background", "a person riding a skateboard on a wooden surface"], "question": "which object is made of wood", "label": 1}, {"captions": [" a green skull and sphere.", " a wooden staircase with a red and white railing and a red arrow."], "sample_ids": ["4f4dc1300ab24b0a910da77a4d5e783f", "bb8bb4c9972d4b718b8bbd3ed5fdd14d"], "properties": ["color, skull, sphere", "arrow, red, white"], "captions_pred_pc": ["a black and white illustration of a dandelion on a white background dandelion illustration on a white background illustration", "above a black and white image of a square with a white cross in the center"], "captions_pred_image": ["a 3d model of a skull on a gray background", "a 3d model of a spiral staircase"], "question": "which entity has a red arrow?", "label": 1}, {"captions": ["a featuring a train, large ship, long metal pipe, boat, black map of kenya, and a black piece of plastic.", "3d white playground set with slides and swings, featuring a plane, building, and dragon in the sky."], "sample_ids": ["49c5a9d42ba64fb2b681ef583d700b98", "e694d53545d449319a64cceb0280c3c6"], "properties": ["a train, a ship, a boat", "3d, slide, swing"], "captions_pred_pc": ["above a black and white image of a long, curved line on a white background", "for a 3d model of the letter 'j'"], "captions_pred_image": ["a 3d model of a submarine", "a 3d model of a playground slide"], "question": "which entity has a plane", "label": 1}, {"captions": ["white s of a wall-mounted light, toilet with handle, faucet, and lamp with a light bulb.", " featuring a moss-covered rock, mushroom in grass, and green leaves with a brown spot."], "sample_ids": ["92052c493bf141a08b56f30f9c5e2d61", "34ebe81ae93841ca829efd15aee4d8c1"], "properties": ["light, toilet, faucet", "moss, mushroom, grass"], "captions_pred_pc": ["in 15 words or less a 3d illustration of an object made of dots on a white background 3d illustration of an object made of dots on a white background royalty free illustration", "for a black and white illustration of a cloud on a white background"], "captions_pred_image": ["a white plastic toilet paper holder on a gray background", "a 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor"], "question": "which entity is a natural scene?", "label": 1}, {"captions": [" of a wooden cabinet with drawers and filing features.", " a small white building with stairs and shelves."], "sample_ids": ["abbc90bbd5474f73b16482ccd10e07ec", "9e1f64d4fd514059be934077717536dc"], "properties": ["Cabinet, Drawers, Filing", "building, stairs, shelves"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white image of a person standing in front of a white background"], "captions_pred_image": ["a 3d rendering of a white wooden chest of drawers", "a white 3d model of a building with stairs"], "question": "which entity has more shelves", "label": 1}, {"captions": ["s of a plane, house, pile of rocks, and islands, accompanied by watercolor paintings of a tree, rocks, and a boat in a sand dune.", " a small wooden house with a green roof."], "sample_ids": ["8da8da6ccf5f4011a4115977c55d1cb8", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["s, plane, house", "roof, color, green"], "captions_pred_pc": ["a black and white silhouette of a map", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a black and white image of a small island in the middle of a body of water", "a 3d model of a house with a ladder"], "question": "which entity has a roof that is the color of green", "label": 1}, {"captions": [" a small island featuring a large building, trees, and a house on a hill, surrounded by a forest.", "a featuring a small boat, a rock with a hole, and blue water."], "sample_ids": ["d557c62e9be741a6b0f6b204d11a9c6f", "7ccdffc0d6404e8d9144260255ea0c5c"], "properties": ["house, hill, forest", "water, boat, rock"], "captions_pred_pc": ["above a black and white illustration of a small island in the middle of a body of water", "a black and white illustration of a surfboard"], "captions_pred_image": ["a black and white image of a small island in the middle of a body of water", "a 3d image of an animal laying on the ground"], "question": "which entity has a boat?", "label": 1}, {"captions": ["a white featuring a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet.", " a small black house with a green roof, resembling a shed or container."], "sample_ids": ["f28f57745af04115b0bacdde80c3b9ee", "bdb8e4c36ccb477890fd6ae569ae305c"], "properties": ["a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet", "black, roof, green"], "captions_pred_pc": ["a black and white illustration of a pair of sunglasses", "a black and white drawing of a square with dots all over it"], "captions_pred_image": ["a 3d model of a medieval drinking horn 3d model available for 3d printing royalty free 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d", "a 3d model of a small black building"], "question": "which entity is a building?", "label": 1}, {"captions": [" a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles.", " a small triangular-shaped object."], "sample_ids": ["bd7aab78974643f5a0660c699daf8eb3", "2d02985030804209a26c2c53b96a06f9"], "properties": ["roof, color, yellow", "shape, triangle, small"], "captions_pred_pc": ["a black and white drawing of a room", "a black and white image of a piece of metal"], "captions_pred_image": ["a 3d model of a table and chairs on a white background", "a black piece of furniture on a white background"], "question": "which object is a triangle?", "label": 1}, {"captions": [" a silver spaceship-skull hybrid with blue lights, spheres, and jewels, featuring a robot with a head and blue eyes.", " a brick building with a roof structure and roof truss."], "sample_ids": ["ae7c805076b14abe83578069c7bf1c03", "84e8acad28664a738df69d719df9e263"], "properties": ["color, light, jewels", "roof, structure, truss"], "captions_pred_pc": ["in 15 words or less a black and white image of a snowflake on a white background royalty free illustration", "a black and white polka dots pattern on a white background polka dots pattern on a white background illustration"], "captions_pred_image": ["a 3d sculpture of a heart with various objects on top of it", "a 3d model of a brick building with a roof"], "question": "which entity has a roof structure", "label": 1}, {"captions": ["small airplane flying in the air.", " a white and gold mirror on a wooden easel stand."], "sample_ids": ["903dd2d9d2b7499580a0c1645df88a22", "0d10d734448d4a5d8d07b938c12d9d80"], "properties": ["airplane, fly, air", "color, white, gold"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a panda holding a light bulb", "for a black and white image of a shoe on a white background"], "captions_pred_image": ["a 3d model of an airplane royalty-free 3d model", "a 3d model of a standing mirror on a white background"], "question": "which object is white and gold?", "label": 1}, {"captions": [" a small, modern house with a green roof, located on a hill, surrounded by trees, grass, and a pond.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["a452d5381dad4dc09f5ebe10635ae5fe", "06a1c233fb444830b577aa06e2c01294"], "properties": ["house, roof, green", "house, tree, hill"], "captions_pred_pc": ["above an illustration of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a 3d model of a building with a black roof", "a black and white image of a house in the middle of a field"], "question": "which house is on a hill", "label": 0}, {"captions": [" the earth featuring various elements such as temperature chart, blue and green stripes, blue arrow, exosphere label, england label, blue and purple stripes, and a blue flag.", " a stone throne with stairs and a tree, accompanied by a concrete wall featuring a statue and a fireplace with a shelf above it."], "sample_ids": ["4945571db2d8467cb2aed8dd0d891c2e", "93fb4197f0014f7582029af24c7ed9de"], "properties": ["color, temperature, england", "throne, stairs, tree"], "captions_pred_pc": ["of a black and white photo of an airplane on a white background", "in 15 words or less a black and white image of a toilet paper roll on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of the earth with a rainbow in the sky", "a 3d model of a throne with a tree on it"], "question": "which entity has a throne", "label": 1}, {"captions": ["a 3d object featuring a fish with a long nose and open mouth, a cat head with red, yellow, and green stripes, a frog with a long snout, a white horse with green and yellow stripes, and a wolf mask.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["276699bb0f974c47b4e2954cfcd1651c", "06a1c233fb444830b577aa06e2c01294"], "properties": ["a, color, white", "house, tree, hill"], "captions_pred_pc": ["a black and white image of a skull in the shape of a butterfly", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a 3d model of an animal with a long nose", "a black and white image of a house in the middle of a field"], "question": "which entity has a house on a hill?", "label": 1}, {"captions": [" of a small wooden house with a blue roof, featuring a police sign and resembling a lighthouse.", " a small white house with a roof."], "sample_ids": ["9b2c93d651c3409096118c5ce5b993f2", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["house, roof, blue", "roof, color, white"], "captions_pred_pc": ["a black and white illustration of a coffee mug on a white background royalty free illustration", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d model of a small house and barn", "a 3d model of a building with a white roof"], "question": "which house has a white roof", "label": 1}, {"captions": [" of a small island featuring a white lighthouse, a fountain, and a grassy crater.", "a featuring a building, a plane, a printing machine, a large machine, a room with a computer, and a large gray box."], "sample_ids": ["2a30e69498ff4fd1a33c1fb72286f553", "7e2b63ba4ce24cecacea67dd052016c1"], "properties": ["lighthouse, fountain, crater", "building, plane, room"], "captions_pred_pc": ["a black beanie with sparkles on a white background", "a black and white image of a rectangular frame with dots on a white background a black and white image of a rectangular frame with dots on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of an object on top of a pedestal", "a 3d model of a box with a lot of items inside"], "question": "which entity has a room?", "label": 1}, {"captions": [" a house featuring a roof, floor plan, small bathroom, pool, and ceiling light fixtures.", " a mountain range in a mountainous area with a large green field and a small arrow."], "sample_ids": ["6c6549e2975a48a1b59ebe2a6562900e", "e80eca95874c4bffb8aec54044824d87"], "properties": ["floor plan, bathroom, pool", "arrow, mountain, area"], "captions_pred_pc": ["the floor plan of the house the floor plan of the house on a white background royalty free illustration", "a line with a square on it"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a mountain range royalty free 3d model no."], "question": "which entity has a mountain range?", "label": 1}, {"captions": [" a building or house with a roof and floor plan, resembling a pyramid with a flat roof.", " a small house with a roof and ceiling-mounted air conditioner."], "sample_ids": ["7a91292e1ed64e60a1bbbb499209a0df", "6965067ea9e34357a7af21a2f078fbac"], "properties": ["apse, roof, floor plan", "roof, air conditioner, house"], "captions_pred_pc": ["a black and white drawing of a room", "a black and white illustration of a window"], "captions_pred_image": ["a 3d model of a building in the shape of a pyramid", "a 3d rendering of a small house with a covered porch"], "question": "which entity has a roof", "label": 1}, {"captions": [" a small house with a roof and door, resembling a shack or shed.", " of a house with a roof."], "sample_ids": ["f1b557775310478893242180defa4d80", "195ce38d57164eb588d19f8bd337f36e"], "properties": ["shack, roof, door", "roof, house, roof"], "captions_pred_pc": ["a black and white illustration of a telephone on a white background", "a black and white drawing of a toilet on a white background"], "captions_pred_image": ["a 3d model of a small house in the middle of a field", "a 3d model of a small house royalty free 3d model preview no 2"], "question": "which entity has a roof", "label": 1}, {"captions": [" of a bench and lamp post.", " of a small wooden house with a blue roof, featuring a police sign and resembling a lighthouse."], "sample_ids": ["ac6e6b5ee63840a484b5ad2508675de1", "9b2c93d651c3409096118c5ce5b993f2"], "properties": ["A, bench, lamp post", "house, roof, blue"], "captions_pred_pc": ["a black and white image of a bench and a street lamp", "a black and white illustration of a coffee mug on a white background royalty free illustration"], "captions_pred_image": ["3d model of a street lamp and bench royalty free 3d model preview no.3", "a 3d model of a small house and barn"], "question": "which entity has a blue roof", "label": 1}, {"captions": ["a featuring a small boat, a rock with a hole, and blue water.", " a spider-like creature with long arms and legs."], "sample_ids": ["7ccdffc0d6404e8d9144260255ea0c5c", "199bcb789e0c439bb2eeb32f2425cc36"], "properties": ["water, boat, rock", "arachnid, leg, arm"], "captions_pred_pc": ["a black and white illustration of a surfboard", "a black and white illustration of a spider on a white background"], "captions_pred_image": ["a 3d image of an animal laying on the ground", "a black and white image of an alien creature"], "question": "which entity has more legs", "label": 1}, {"captions": [" a green, spiked cube-like creature with eyes and a liquid-like texture.", " a mountainous landmass, resembling a small island or state like kentucky or wyoming."], "sample_ids": ["0d2850148a154f3ca72d72817e120759", "8ca9b999b69c4965bd9eb4445d605bf2"], "properties": ["texture, spikes, eyes", "mountainous, landmass, state"], "captions_pred_pc": ["a black and white drawing of a flower", "a black and white map of the state of new york"], "captions_pred_image": ["a 3d model of an ice cube on a white background royalty-free 3d model preview no. 3", "a 3d model of a piece of paper"], "question": "which entity is a landmass", "label": 1}, {"captions": [" of a mushroom cloud with white tree and coral plant elements, featuring blue and green leaves.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["bca5233d878e4cf09b5bc2bb6f3915b0", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["color, shape, texture", "island, terrain, water"], "captions_pred_pc": ["a black and white image of a square with dots on it", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a tree made out of white flowers", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", "a featuring a man, woman, robot, cat, and dog interacting in and around a house."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "92498f398e244020a867686729633610"], "properties": ["color, material, structure", "a, house, cat, dog"], "captions_pred_pc": ["a black and white drawing of a room with dots", "above a black and white illustration of a dog sitting on top of a pile of rocks"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a 3d model of a person standing in front of a wall"], "question": "which entity is a house?", "label": 1}, {"captions": [" of a small white building or house with a white ceiling and kitchen hood.", " a snowy city with buildings and a plane flying overhead."], "sample_ids": ["17b23d23309d4385938ced3ca536a1d1", "cc63ceb2b5e84872a1a1f6423de419e2"], "properties": ["building, ceiling, kitchen", "building, plane, city"], "captions_pred_pc": ["above a black and white drawing of a bathroom with a toilet and sink", "a black and white photo of an airplane on a white background"], "captions_pred_image": ["a 3d model of a white building on a gray background", "a 3d model of a city in black and white"], "question": "which entity has a plane flying over it", "label": 1}, {"captions": [" of a round birthday cake with a single candle in the middle.", " of two rocks with ice elements."], "sample_ids": ["76c834f15f664dbdb7c08ca1ff936e7c", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["candle, color, shape", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white illustration of a shower head", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a birthday cake with a candle on top royalty free 3d model", "a 3d image of two rocks on a gray surface"], "question": "which object is more likely to be a rock", "label": 1}, {"captions": ["a featuring a pile of food, leaves, shredded paper, and rocks with scattered broken paper pieces.", "a featuring a small boat, a rock with a hole, and blue water."], "sample_ids": ["5206d4d96c2d428b9c1f7ee0e13bcffb", "7ccdffc0d6404e8d9144260255ea0c5c"], "properties": ["food, leaves, shredded paper, rocks", "water, boat, rock"], "captions_pred_pc": ["a black and white image of a bird in flight", "a black and white illustration of a surfboard"], "captions_pred_image": ["a 3d model of a mountain range on a white surface", "a 3d image of an animal laying on the ground"], "question": "which entity has a boat?", "label": 1}, {"captions": ["a 3d cartoon character wearing a hat and holding various items, such as a sword, gun, broomstick, and stick.", " a small table with a staircase and a square ceiling light."], "sample_ids": ["9863dee114264b89a95dca4c78d08424", "ef0cbf4628b74253b885480deefe6fbd"], "properties": ["hat, sword, gun", "table, staircase, light"], "captions_pred_pc": ["for a black and white illustration of a small monster holding a spoon in one hand and a cup of coffee in the other", "for a black and white image of a small square on a white background"], "captions_pred_image": ["a 3d model of a police officer holding a baton and wearing a hat and helmet", "a 3d model of a table with a staircase"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" of a wooden stand featuring two legs, six balls, and a row of teddy bears, resembling a puzzle toy, game board, and bookshelf.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["e36ba9c060cd49f48a0acc1790fcf049", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["resembles, toy, bookshelf", "roof, color, yellow"], "captions_pred_pc": ["a black and white image of a book cover", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a wooden display stand with six cups on it", "a 3d model of a house with a roof"], "question": "which entity has a roof that is the color of yellow?", "label": 1}, {"captions": ["a white 3d printed mickey mouse dice with various numbers and symbols on it.", "a yellow and blue toy submarine."], "sample_ids": ["e2645ac544844f3c981203134a99c30c", "da2719db8f4f4668af5b74c96e80c6cd"], "properties": ["- material is plastic- shape is dice- color is white", "color, yellow, blue"], "captions_pred_pc": ["a circle of dots with the number 2 in the center", "of a black and white image of a starfish on a white background"], "captions_pred_image": ["a 3d printed white dice with a mickey mouse face", "a 3d model of a submarine toy"], "question": "which toy is made of plastic", "label": 1}, {"captions": [" of a cracked, holey cheese-like stone with black markings and writing.", " of a small white building with stairs and a lid."], "sample_ids": ["0316725634c64f0b96ef60e7505c1b34", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["description, material, shape", "building, stairs, lid"], "captions_pred_pc": ["a black and white illustration of a sponge on a white background", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of an arrowhead on a white background royalty free 3d model preview no. 3", "a 3d model of a white box on a gray background"], "question": "which entity is a building?", "label": 1}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["color, material, structure", "island, mountain, grass"], "captions_pred_pc": ["a black and white drawing of a room with dots", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more grass", "label": 1}, {"captions": [" a house with a purple roof and glass block structure.", " a small house with a tree in front, situated on a hill."], "sample_ids": ["e8ac7de076e54f07ace1a0ead07f6f57", "3bde44b5f10946398f1bb9843dc14caa"], "properties": ["roof, color, purple", "house, tree, hill"], "captions_pred_pc": ["a black and white image of a fire hydrant on a white background fire hydrant on a white background royalty free illustration", "a black and white photo of a cell phone in a puddle of water"], "captions_pred_image": ["a 3d model of a building with a glass roof", "a 3d model of a house in the middle of a field"], "question": "which house has a tree in front", "label": 1}, {"captions": ["3d lego model of the white house with a garden and tennis racket.", "3d white model featuring a boat, block, turbo text, puma logo, box, and shelf."], "sample_ids": ["349d70e3f3d24c72ad05d5ceeee291b6", "1fe7018cb01a4be58c0f211b8e2db93f"], "properties": ["a, model, white house", "3d, model, white"], "captions_pred_pc": ["a black and white photo of a person sitting on top of a pile of rocks", "of a black and white usb drive with the word 'turbo' on it"], "captions_pred_image": ["a 3d model of the white house royalty free 3d model", "a 3d printed object with the word turbo on it"], "question": "which model is white", "label": 1}, {"captions": [" a small house with a yellow roof and chimney.", " a large white and metal building with a metal roof structure."], "sample_ids": ["0056e85a243b47a08ddbcd36816cb6ae", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["roof, yellow, chimney", "roof, metal, white"], "captions_pred_pc": ["a black and white illustration of a house made up of dots on a white background a black and white illustration of a house made up of dots on a white background royalty free illustration", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a 3d model of a small house royalty-free 3d model preview no.2", "a 3d model of a large white box"], "question": "which building has a metal roof", "label": 1}, {"captions": ["s of a rock, boat, plane, and leaf on a stick.", " of a tree stump and rock with flowers on them."], "sample_ids": ["be0884a7ced34b3d92687b6087798a1e", "3f74af45aeeb43ee95e2c8a5e3afeae6"], "properties": ["s, stick, leaf", "flower, rock, tree stump"], "captions_pred_pc": ["above a black and white drawing of an object floating in the sky", "above a black and white drawing of a flower on a white background"], "captions_pred_image": ["a black and white photograph of a rock on a sandy surface", "a 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree st"], "question": "which entity has more flowers", "label": 1}, {"captions": [" of a white plastic bar, resembling a shelf or towel rod, with a metal light fixture.", " of two rocks with ice elements."], "sample_ids": ["f84dec547a3d4710b48db11fc9fa489a", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["light source, fixture, color", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white photo of a small square on a white background", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d rendering of a white object on a gray background", "a 3d image of two rocks on a gray surface"], "question": "which entity is a rock?", "label": 1}, {"captions": [" a large metal building with a roof and truss structure.", " a small wooden house with a green roof."], "sample_ids": ["b85a99699ccd4bcba213322113bb253d", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["roof, truss, structure", "roof, color, green"], "captions_pred_pc": ["of a metal grate on a white background", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d model of a long metal fence", "a 3d model of a house with a ladder"], "question": "which roof is green", "label": 1}, {"captions": [" the earth featuring various elements such as temperature chart, blue and green stripes, blue arrow, exosphere label, england label, blue and purple stripes, and a blue flag.", " of a broken stone wall featuring an angel sculpture."], "sample_ids": ["4945571db2d8467cb2aed8dd0d891c2e", "aae2c42740a04fd68068f5707111d26f"], "properties": ["color, temperature, england", "image is a sculpture of an angel on a wall"], "captions_pred_pc": ["of a black and white photo of an airplane on a white background", "a black and white image of a toilet paper roll"], "captions_pred_image": ["a 3d model of the earth with a rainbow in the sky", "a 3d model of a marble sculpture of an angel"], "question": "which entity is a sculpture?", "label": 1}, {"captions": ["a featuring a small desk and chair, table, staircase, bathroom with sink, and square ceiling light in a small house setting.", " a furnished room featuring a table, chairs, desk, shelves, and a small kitchen and living area."], "sample_ids": ["1be04fc7b47c47e9aaa9d2720af16b87", "9403cf50e8cb44c195b76afd89d0c9fb"], "properties": ["desk, chair, table", "furniture, room, kitchen"], "captions_pred_pc": ["for a black and white image of a letter 'f'", "a black and white image of a room with dots all over it"], "captions_pred_image": ["a 3d model of a staircase in a room", "a 3d rendering of a room with a table, chairs, and shelves"], "question": "which entity has a kitchen?", "label": 1}, {"captions": ["a pile of mint green soap cubes.", " tall grass, plants, rocks, and a tree."], "sample_ids": ["edd35e0657b640b1b8fcf86942e1a9e5", "eefed882ed5f4711bc5a76332d9712f3"], "properties": ["color, mint, soap", "grass, plants, rocks"], "captions_pred_pc": ["a black and white illustration of a group of dots on a white background", "a black and white drawing of a tree"], "captions_pred_image": ["a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background", "a 3d model of a group of trees"], "question": "which entity has more plants", "label": 1}, {"captions": ["a 3d object featuring a flat roof with blue and white pattern, a purple and white bench, a blue and purple striped runner, a bed with blue and purple stripes, blue and white striped paper, a bench with a blue and purple pattern, and a ceiling light fixture with wooden slats.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["28d0cf71ed684dcb87b2c9b2744c3633", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["runner, bed, bench", "a, material, clay"], "captions_pred_pc": ["a black and white drawing of a snowflake on a white background a creative black and white drawing of a snowflake on a white background royalty free illustration", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a long, curved structure", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": [" an ice cream machine, popsicle with two sticks, and a shelf with a computer monitor.", " of a wooden pirate chest with a metal roof."], "sample_ids": ["cb840159fea7436d81eb33bdccad3596", "5720efe7e68e434fa244d4978e623114"], "properties": ["A, a, a", "metal, roof, chest"], "captions_pred_pc": ["a black and white illustration of a bench", "in 15 words or less a black and white drawing of dots"], "captions_pred_image": ["a 3d rendering of a white and gray wall mounted shelf", "a 3d model of a wooden chest"], "question": "which chest has a metal roof", "label": 1}, {"captions": [" a crab with long legs, wires, and tentacles, resembling a hybrid of an octopus, squid, spider, and robot.", " a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog."], "sample_ids": ["ec5914b53b6a4cde8de4820050bc46c5", "f6c6e7a65a3e42dfa431b1d984c72f28"], "properties": ["resembles, octopus, squid, spider, robot", "house, fence, dog"], "captions_pred_pc": ["a black and white illustration of a jellyfish", "above a black and white drawing of a bathroom"], "captions_pred_image": ["a 3d model of a robotic octopus on a white background royalty free 3d model preview no.2", "a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a"], "question": "which entity has a fence", "label": 1}, {"captions": [" a large house with a roof on a platform.", " of two rocks with ice elements."], "sample_ids": ["cb3e09a301b746918a682a595037c7f7", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["roof, platform, house", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white image of a piece of paper", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a small house", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": ["a black table lamp with a black shade and a hat on top.", "a black and gold cylindrical object with a gold handle, resembling a cigarette lighter, lamp, and pen."], "sample_ids": ["31c00c8337de4854a20299d719136cce", "c9b1c89380e947f58aa06eb56c93c6d8"], "properties": ["color, black, shade, black", "- color is black and gold- shape is cylindrical- material is metal"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a circular object on a white background royalty free illustration", "a black and white image of a circular object on a white background"], "captions_pred_image": ["a 3d model of a table lamp royalty free 3d model preview no.2", "a 3d model of a black and white object on a gray background"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a white spiral, earbuds, ceiling light, and silver ring with black and white scissors and design.", " a wooden roof structure with a pink roof."], "sample_ids": ["c69f60b389124ad9b4f81c64ec332054", "b70565bda31d42958d3597bf6067ddd2"], "properties": ["earbuds, light, ring", "roof, color, pink"], "captions_pred_pc": ["a black and white drawing of a needle and thread", "above a black and white image of a metal grate"], "captions_pred_image": ["a black and white illustration of a pair of sunglasses and a pair of scissors next to each other on a white background", "a 3d model of the roof of a building"], "question": "which entity has a roof", "label": 1}, {"captions": ["a 3d printed model of a small white house.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["d62a9376f8be4c7585d15ccf68c51239", "b896a0898efe4059a776193c02132129"], "properties": ["size, material, color", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white image of a building with a lot of dots", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a small white birdhouse", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": [" a small wooden house with a green roof.", " a small house on an island with a boat and a bird on a rock."], "sample_ids": ["912e0b8da1e8496489833d8a8ecffd31", "1e56a92a0ddc41e59694bd1ad1656149"], "properties": ["roof, color, green", "house, rock, bird"], "captions_pred_pc": ["a black and white map of hawaii on a white background", "a black and white drawing of a boat in the middle of a body of water"], "captions_pred_image": ["a 3d model of a house with a ladder", "a 3d rendering of a house on a rock"], "question": "which house has a bird on a rock?", "label": 1}, {"captions": ["a featuring a bowl of soup, a brush, a knife, a slice of pizza, a torn piece of paper, and a roof with a hole in it.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["db19e46828c94b1a8b9a6ca9f673c604", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["a, roof, soup", "island, terrain, water"], "captions_pred_pc": ["in 15 words or fewer a black and white illustration of the moon", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a bowl and chopsticks on a sheet of paper", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": ["a featuring a small island with a mountain, a lake in the middle, and a bird flying above.", " a small house with a roof and ceiling-mounted air conditioner."], "sample_ids": ["e51fead89ae64968b2ca7f4ccb6d3b97", "6965067ea9e34357a7af21a2f078fbac"], "properties": ["a, bird, lake", "roof, air conditioner, house"], "captions_pred_pc": ["a black and white photograph of a cloudy sky", "a black and white illustration of a window"], "captions_pred_image": ["a 3d model of a mountain range in black and white", "a 3d rendering of a small house with a covered porch"], "question": "which entity is a building?", "label": 1}, {"captions": [" of a small red renault clio car.", " a spider-like creature with long arms and legs."], "sample_ids": ["e4f9463f6e004b90bb977d12f6375b9c", "199bcb789e0c439bb2eeb32f2425cc36"], "properties": ["color is red, make is renault, model is clio", "arachnid, leg, arm"], "captions_pred_pc": ["a close-up view of the back of a cell phone on a white background", "a black and white illustration of a spider on a white background"], "captions_pred_image": ["a 3d model of a car royalty free 3d model preview no.2", "a black and white image of an alien creature"], "question": "which is not a arachnid", "label": 1}, {"captions": ["a small white wooden castle with flags, turrets, and two towers.", " an african figurine with a bow and arrow, standing on a wooden base, wearing a hat."], "sample_ids": ["345b1c7d2d9b4524aa0dcdc3cd27e4da", "d81d13362ae04371bb2cba46e4939665"], "properties": ["turrets, flags, towers", "hat, bow, arrow"], "captions_pred_pc": ["a black and white illustration of a square made up of many small dots on a white background", "above a black and white photo of an ice sculpture"], "captions_pred_image": ["a 3d model of a castle with towers and a drawbridge royalty-free 3d model preview no.1", "a sculpture of an african man sitting on a pedestal"], "question": "which object has a hat?", "label": 1}, {"captions": [" a red and blue metal-framed building structure with a steel house frame.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["a453ac83ef1e4f76bd7978451888d5f5", "6b745457e06840119058883b35f78f58"], "properties": ["color, material, frame", "roof, color, blue"], "captions_pred_pc": ["of a black and white photo of a bike on a white background", "a black and white image of a building with dots"], "captions_pred_image": ["a 3d model of a metal frame structure", "a 3d model of a house with a steeple on top"], "question": "which building is made of wood", "label": 1}, {"captions": [" a modern office building with a green door, green roof, windows, and blue lights.", " a small white building with a door, resembling a box-shaped house."], "sample_ids": ["d6087023095446fbadef1721478373b2", "1b5fe88d0ff149ae9d8b4eb455c5c90c"], "properties": ["door, roof, window", "shape is box, color is white, door is present"], "captions_pred_pc": ["a black and white drawing of a toilet brush", "a black and white image of a person standing in front of a white background"], "captions_pred_image": ["a 3d model of an apartment building", "a 3d model of a white, open shelving unit"], "question": "which building has a door", "label": 1}, {"captions": [" a house with a roof structure, featuring a brick wall and suspended box.", " a small, modern house with a green roof, located on a hill, surrounded by trees, grass, and a pond."], "sample_ids": ["1a7bfcf3755142bab90d3d7cb02d0f2c", "a452d5381dad4dc09f5ebe10635ae5fe"], "properties": ["roof, structure, wall", "house, roof, green"], "captions_pred_pc": ["a black and white illustration of a group of dots on a white background", "above an illustration of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white"], "captions_pred_image": ["a 3d model of a building with a roof", "a 3d model of a building with a black roof"], "question": "which house has a green roof", "label": 1}, {"captions": [" a bottle with a skull and crossbones label, containing yellow liquid, resembling poison or medicine.", "star wars stormtrooper "], "sample_ids": ["135384c5b3ee4d0aaf3b2abaea060a67", "05678f11f4fe47178c9b4941ee334e16"], "properties": ["label, color, shape", "a, color, white"], "captions_pred_pc": ["of a black and white photo of a circular object on a white background", "a black and white illustration of a stormtrooper"], "captions_pred_image": ["a bottle with a skull and crossbones label on it", "a 3d model of a star wars stormtrooper"], "question": "which entity is white", "label": 1}, {"captions": ["a featuring a robot, a black and white box, a man with a suitcase, a door, a large white building, and a white shelf.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["4be12bf79ead47a68cd67fc01a6e5c8c", "a17477b445b3443189dad22f768b888b"], "properties": ["a, door, suitcase", "roof, pillar, stairs"], "captions_pred_pc": ["a black and white drawing of a dog in a cage", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d model of a robot standing in front of an open door", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": [" a wooden cube.", " a white plastic container with a lid, a small box, a cup, a bottle, and a jar."], "sample_ids": ["cf3ae5b8dad64e338c835d22947ce796", "20a02705a66f460492e07345e84a62ed"], "properties": ["shape is cube, material is wood, color is brown", "a box, a cup, a bottle, a jar"], "captions_pred_pc": ["in 15 words or less a black and white pattern of dots on a white background a black and white pattern of dots on a white background royalty free illustration", "a black and white pattern of dots on a white background"], "captions_pred_image": ["a 3d model of a marble cube on a white background royalty free 3d model preview no.3", "a 3d model of a plastic bottle, a plastic cap, and a plastic container"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a yellow and white structure with yellow poles.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["bada91e216fd486d9e3e356d48978f33", "b896a0898efe4059a776193c02132129"], "properties": ["color, shape, poles", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white drawing of a dotted pattern on a white background a black and white drawing of a dotted pattern on a white background royalty free illustration", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a building with multiple levels", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": ["a featuring a plane, a small plane, a bird, and a dragonfly all flying in the air.", "a featuring a chair, a building, a glass tower, and a throne made of money, with various unique elements such as a green roof and a computer chip."], "sample_ids": ["f39783d05dec49e49482c407d656e0f7", "18d2e75f23474d7489a6d7d605dfc76d"], "properties": ["a, plane, small plane", "throne, chair, building"], "captions_pred_pc": ["of a black and white photo of an airplane propeller", "a black and white illustration of a person sitting on a bench"], "captions_pred_image": ["a 3d printed model of a fighter plane in the air", "a 3d model of a building on top of a table"], "question": "which entity has a throne made of money?", "label": 1}, {"captions": [" a cityscape featuring various buildings, trees, an airport runway, and a plane.", " a stone wall featuring carvings and statues."], "sample_ids": ["9b45036d3f0342a78db9a25938dc77fc", "42f663140f834d1ab5f95cd8a5ad04b3"], "properties": ["building, tree, plane", "carving, statue, wall"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a snowflake in the shape of a snowflake royalty free illustration", "a black and white image of a snowflake on a white background"], "captions_pred_image": ["a black and white image of an industrial area with buildings and trucks in the foreground", "a 3d image of a group of statues on a wall"], "question": "which entity has fewer buildings", "label": 1}, {"captions": [" a small building with windows and a roof.", " a building with a roof structure, featuring a wooden truss and ceiling with a light."], "sample_ids": ["0ef2cac27e364c0687afae7ab5040cc3", "cb42ecb7a3fd4eba99f166150ecbc9a7"], "properties": ["roof, windows, building", "roof structure, truss, ceiling"], "captions_pred_pc": ["a black and white square made up of small dots on a white background", "a black and white image of a stainless steel sculpture"], "captions_pred_image": ["a 3d model of an apartment building royalty free 3d model preview no 3", "a 3d model of a barn royalty free 3d model preview no 2"], "question": "which building has a roof structure", "label": 1}, {"captions": [" a small building with a roof.", " a small house with a roof."], "sample_ids": ["a3089017e4c5463d852de65abf61eda5", "0d2246e433ce4066b76489f17ba8d694"], "properties": ["roof, building, small", "roof, house, small"], "captions_pred_pc": ["in 15 words or less a black and white image of a building with dots all over it", "a black and white square made up of small dots on a white background"], "captions_pred_image": ["a black and white image of a small house", "a 3d model of a house with a triangular roof"], "question": "which entity has a roof", "label": 1}, {"captions": ["an orange of a forklift truck in a parking lot.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["32d757fbd29640ffb5aab34925525a29", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["orange, forklift, parking lot", "island, mountain, grass"], "captions_pred_pc": ["a black and white illustration of a cell phone", "a black and white map of the island of malta"], "captions_pred_image": ["a black and white photograph of a forklift", "a 3d image of a small island in the middle of a lake"], "question": "which has more grass", "label": 1}, {"captions": ["white and red mouse with a red tail and cord.", " a molecule featuring green, red, and blue spheres."], "sample_ids": ["2613ec608c454ee5b0f2d5efccd78766", "1c0e821eb7c4489dbff9e20d7e8575a3"], "properties": ["color, tail, cord", "color, sphere, molecule"], "captions_pred_pc": ["of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black and white photograph of a black", "a black and white photograph of a group of geometric shapes arranged in the shape of a diamond"], "captions_pred_image": ["a 3d mouse on a gray background royalty free 3d model preview", "a 3d model of a molecule in the shape of a pyramid"], "question": "which entity is a molecule?", "label": 1}, {"captions": [" a stack of books, a pile of paper, and a lamp with a black and white shade.", " a small house with a tree in front, situated on a hill."], "sample_ids": ["6a06b505bcb34026a07ac15931f9f6f3", "3bde44b5f10946398f1bb9843dc14caa"], "properties": ["books, paper, lamp", "house, tree, hill"], "captions_pred_pc": ["a black and white silhouette of a map of the state of new york", "a black and white photo of a cell phone in a puddle of water"], "captions_pred_image": ["a black and white photograph of a toilet paper holder", "a 3d model of a house in the middle of a field"], "question": "which entity is situated on a hill", "label": 1}, {"captions": [" of a small white dollhouse featuring furnished rooms, including a bedroom with a bed and desk, and a bathroom.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["f178fb523ad7421aaa90a92ee736ee00", "06a1c233fb444830b577aa06e2c01294"], "properties": ["bedroom, bathroom, bed", "house, tree, hill"], "captions_pred_pc": ["a black and white drawing of a room with dots", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a 3d model of a small room with a bed, desk, and chair", "a black and white image of a house in the middle of a field"], "question": "which entity has a house", "label": 1}, {"captions": [" a small house with a pond and situated on a rock.", " a city with buildings, houses, trees, and grass."], "sample_ids": ["92859eb82a344134806b37cc209927c6", "bc649e19956041cf89c1572f1a33cff1"], "properties": ["house, rock, pond", "buildings, houses, grass"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a toaster", "in 15 words or less a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a house in the middle of a field", "an image of a pile of trash on the ground"], "question": "which entity has more grass", "label": 1}, {"captions": [" a small house on an island with trees, shrubs, a pool, and a lake.", "a featuring a building, a coin, a small black box, and a ball."], "sample_ids": ["c8331489fca44685bedfa1bdadf6ccb3", "949cf1a57aea45d18261e980b21b8c35"], "properties": ["house, lake, pool", "a, building, coin, ball"], "captions_pred_pc": ["a black and white image of a pattern on a piece of paper", "a black and white illustration of a triangular shaped object"], "captions_pred_image": ["a 3d model of a large building", "a 3d model of a box with a coin next to it"], "question": "which entity has a coin?", "label": 1}, {"captions": [" of a wooden nightstand with a hexagonal pattern, white top, and drawer.", " a stone, wood, rock, sliced bread, and a skull with a blue hat."], "sample_ids": ["e6f2dbec6d464b4da4aa47bce242f6e5", "0169af65ffc64bbf8e2fe6c6de08d485"], "properties": ["Drawer, Pattern, White", "hat, skull, bread"], "captions_pred_pc": ["a cross made of dots on a white background vector illustration of a cross made of dots on a white background royalty free illustration illustration of a cross made of dots on a white background vector illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a", "a black and white illustration of a skull in the shape of dots"], "captions_pred_image": ["a 3d rendering of a wooden box with a handle", "a black and white image of a stone sculpture"], "question": "which entity has a skull?", "label": 1}, {"captions": [" a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles.", " of a house with a roof structure, including a greenhouse."], "sample_ids": ["bd7aab78974643f5a0660c699daf8eb3", "d7483292784b4e2b81df1c50f2a8664a"], "properties": ["roof, color, yellow", "roof, structure, greenhouse"], "captions_pred_pc": ["a black and white drawing of a room", "a 3d illustration of a window with dots on a white background 3d illustration of a window with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a table and chairs on a white background", "a 3d model of a building with a roof"], "question": "which roof is not a greenhouse", "label": 1}, {"captions": [" a white hand, ear bud, teddy bear, earpiece, and utensil.", " a small white table with stairs and a ladder, featuring a black and white kitchen hood and a black square ceiling light."], "sample_ids": ["1c59287d496f4da6b245a01d25a7e2a4", "5565c16f297e405f9d5dbf0ebb623605"], "properties": ["earbud, earpiece, earpiece", "table, stairs, ladder"], "captions_pred_pc": ["a black and white drawing of a hand holding a cup", "above a black and white photograph of a small square in the center of the image"], "captions_pred_image": ["a white object on a grey background", "a 3d model of a table with a stool on top"], "question": "which entity has a table", "label": 1}, {"captions": [" a small white building featuring a green drawer, white curved wall, kitchen sink, and windows, resembling a floor plan of a gym.", " a spider-like creature with long arms and legs."], "sample_ids": ["b494049bb15440949e465d54a72b2f02", "199bcb789e0c439bb2eeb32f2425cc36"], "properties": ["building, floorplan, gym", "arachnid, leg, arm"], "captions_pred_pc": ["above a black and white drawing of a floor plan", "a black and white illustration of a spider on a white background"], "captions_pred_image": ["a 3d model of a white building with two floors", "a black and white image of an alien creature"], "question": "which entity has more legs", "label": 1}, {"captions": [" of a wooden staircase with marble floor and wooden railings in a house.", " of a wildebeest with horns, royalty-free."], "sample_ids": ["ee70964ce5e841bd87381cff40d59b88", "f01175538f7b4ffe8e2648d772e94c5c"], "properties": ["floor, staircase, railings", "image size, image, royalty"], "captions_pred_pc": ["a black and white drawing of a light switch", "a black and white image of a dog jumping in the air"], "captions_pred_image": ["a 3d model of a staircase on a marble surface", "a 3d model of a wildebeest"], "question": "which image is smaller", "label": 1}, {"captions": ["a 3d white cube featuring various text and logos, including \"gypsy stribes,\" \"guinea pig studios,\" \"guillaume pi sodds,\" \"happy studios,\" and \"guillem pie sos.\"", " a wooden billiard table with legs."], "sample_ids": ["5d08c34bfb2c4c9b9538e24d68761331", "b20ad62516fa467ba6e8de063998e8e4"], "properties": ["- material is plastic- color is white- shape is cube", "legs, material, wood"], "captions_pred_pc": ["of a black and white photo of a person sitting on a bench", "a black and white drawing of a rectangular shaped object"], "captions_pred_image": ["a 3d image of a cube with the word 'stories' written on it", "a black and white image of a pool table"], "question": "which object is made of wood", "label": 1}, {"captions": [" a house with a green roof and lawn.", "a 3d object featuring a rock, shell, piece of paper, and cat."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "53efab50e5a74e5ea165c763cea15be4"], "properties": ["roof, green, lawn", "a, rock, paper"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "for a flock of birds in the sky"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a black and white image of a piece of paper in the shape of an island"], "question": "which object has a shell?", "label": 1}, {"captions": ["a sword with a yellow handle and white blade.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["ee82fbc66d39467b8f34b91493053e66", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["color, handle, blade", "camera, speaker, ceiling fan"], "captions_pred_pc": ["of a black and white image of a pen on a white background", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a sword on a gray background", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which object has a speaker?", "label": 1}, {"captions": [" a table with two bowls and a cup on it.", " a small house with a yellow roof and chimney."], "sample_ids": ["41842c8d2ebd402da04def3c53c41633", "0056e85a243b47a08ddbcd36816cb6ae"], "properties": ["a, bowl, cup", "roof, yellow, chimney"], "captions_pred_pc": ["a black and white image of two eyes in the sky", "a black and white illustration of a house made up of dots on a white background a black and white illustration of a house made up of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a table on a marble floor royalty free 3d model preview no. 3", "a 3d model of a small house royalty-free 3d model preview no.2"], "question": "which object has a chimney?", "label": 1}, {"captions": [" of a deer and a crystal tree on an island, with a rock and bird nearby.", " of two rocks with ice elements."], "sample_ids": ["2ce5ec0844114576a247b1de2011e0c7", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["image is a deer and a crystal tree on an island, with a rock and bird nearby", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white illustration of a flying saucer", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a deer standing on an icy island", "a 3d image of two rocks on a gray surface"], "question": "which entity has a rock with ice elements?", "label": 1}, {"captions": [" a building or house with a roof and floor plan, resembling a pyramid with a flat roof.", "a white of a woman with her arms outstretched."], "sample_ids": ["7a91292e1ed64e60a1bbbb499209a0df", "2cbfaa4fb2d84a6f94e67b4fd6e2f26f"], "properties": ["apse, roof, floor plan", "image, color, white"], "captions_pred_pc": ["a black and white drawing of a room", "a black and white silhouette of a person on a skateboard"], "captions_pred_image": ["a 3d model of a building in the shape of a pyramid", "a 3d model of a woman with her arms outstretched"], "question": "which entity is a photograph?", "label": 1}, {"captions": ["a 3d object featuring a white door, box, wall with hooks, plastic bucket, sheet of paper, curved wall, and paper with a hole.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["cbbcf78010e34fa9b2e963452d081eb7", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["a, box, wall", "house, pool, balcony"], "captions_pred_pc": ["a black and white illustration of a flower", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d rendering of a white wall with two hooks hanging from it", "a 3d model of a modern house"], "question": "which object has a pool", "label": 1}, {"captions": ["a grayscale of a mountain.", " a house with a roof, wooden beams, and chimney."], "sample_ids": ["17a86c6c9be0478c984ef36c7c22edea", "be1376023c274bdda995d54f3694157f"], "properties": ["color, grayscale, mountain", "roof, beams, chimney"], "captions_pred_pc": ["above a black and white photograph of a galaxy", "a black and white drawing of a bathroom with a shower"], "captions_pred_image": ["a 3d model of a mountain range on a white background royalty-free 3d model", "a 3d model of a house with a roof"], "question": "which entity is a building?", "label": 1}, {"captions": ["a 3d cartoon character wearing a hat and holding various items, such as a sword, gun, broomstick, and stick.", " a tan, cylindrical hat with a hole in it."], "sample_ids": ["9863dee114264b89a95dca4c78d08424", "00c350e9f5f24fcd8bcc378da2963d4c"], "properties": ["hat, sword, gun", "hat, color, tan"], "captions_pred_pc": ["for a black and white illustration of a small monster holding a spoon in one hand and a cup of coffee in the other", "a black and white drawing of a mushroom on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a police officer holding a baton and wearing a hat and helmet", "a 3d model of an object with a hole in it"], "question": "which hat is tan", "label": 1}, {"captions": [" a small white box with a shelf and a hole in it.", " of a stone wall with a window and multiple stone arches."], "sample_ids": ["d023ae78bc5a436eaba13c5ecdd45c56", "db74ee1621464be1b164be26a1af050e"], "properties": ["a, hole, shelf", "window, arches, wall"], "captions_pred_pc": ["a black and white drawing of a dotted square on a white background", "a black and white illustration of a bolt and nut on a white background a black and white illustration of a bolt and nut on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white box on a gray background", "a 3d model of an old brick wall"], "question": "which entity has a window", "label": 1}, {"captions": [" a small yellow table with a staircase and a square ceiling light fixture.", " a small yellow table with a staircase and a square ceiling light fixture."], "sample_ids": ["36f4d2cbd02345c6a77f7345ebde841c", "36f4d2cbd02345c6a77f7345ebde841c"], "properties": ["table, staircase, light", "table, staircase, light"], "captions_pred_pc": ["a black and white photo of the letter g", "a black and white photo of the letter g"], "captions_pred_image": ["a 3d model of a table with stairs", "a 3d model of a table with stairs"], "question": "which entity has a staircase?", "label": 0}, {"captions": [" a two-story building with stairs and a three-tier display.", " a large building with a roof and windows."], "sample_ids": ["1d817b7b6ded439d8b92eeab87e4cf8d", "32d1fbd3ee91426882290305f70021e6"], "properties": ["tiers, building, stairs", "roof, windows, building"], "captions_pred_pc": ["for a black and white photo of a person sitting on a bench", "of a black and white photo of a diamond buckle"], "captions_pred_image": ["a 3d model of a building with two floors and a spiral staircase", "a 3d model of an apartment building royalty free 3d model preview no.2"], "question": "which building has a roof?", "label": 1}, {"captions": ["a 3d low-poly model of a gun with blue lights.", " a white castle composed of small cubes."], "sample_ids": ["58cc3b1c08da4081a7ffdb5a00db6473", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["3d model, color, lights", "composed of, white, cubes"], "captions_pred_pc": ["above a black and white image of an object on a white background", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["a 3d model of a futuristic weapon", "a 3d model of a castle made of white cubes"], "question": "which entity is composed of small cubes", "label": 1}, {"captions": [" a blue character with arms outstretched.", " a cartoon hippo standing on its hind legs with arms outstretched."], "sample_ids": ["e365358ce93a4ba2b1b9a48537b85477", "f3edc84dc71649c1a11270437279255b"], "properties": ["color, shape, size", "hippo, hind, legs"], "captions_pred_pc": ["in 15 words or less a black and white image of a hexagonal ring", "above a black and white illustration of a frog"], "captions_pred_image": ["a 3d model of a person standing with their back to the camera", "a 3d model of a hippopotamus standing on its hind legs"], "question": "which entity is a cartoon?", "label": 1}, {"captions": [" of a wooden cabinet, shelf, and small house in a furnished room.", "two white spheres in a ."], "sample_ids": ["854757ca755240f8b04576d899349151", "1c02212a35134545ab63ab180d629c31"], "properties": ["cabinet, room, shelf", "two, spheres, white, in, a"], "captions_pred_pc": ["a black and white image of a toothbrush in the shape of the letter 'l' 3d illustration on a white background royalty free illustration", "a black and white illustration of two spheres"], "captions_pred_image": ["a 3d model of a living room with a couch, coffee table, and television", "a 3d model of a white ball on a gray background"], "question": "which object is in a room?", "label": 0}, {"captions": ["white 3d tank model", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["31b2f632bf5e4128a5f59a7c9ddad62f", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["color is white, model is 3d, name is tank", "house, fence, playground"], "captions_pred_pc": ["a close-up view of the back of the phone case", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a white 3d model of a tank on a gray background", "a 3d model of a room with a lot of wires"], "question": "which entity has a fence", "label": 1}, {"captions": [" a small white archway structure resembling a building.", " a large, black and white circular building, resembling a stadium or ring structure."], "sample_ids": ["5ad02458cf394134a902e25001d2ffef", "67f46bb0048244c687a58d1017a08f6b"], "properties": ["structure, building, archway", "building, color, black and white"], "captions_pred_pc": ["for a black and white illustration of a castle on a hill", "the letter c in black and white illustration of the letter c in black and white illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustr"], "captions_pred_image": ["a 3d rendering of a white object on a white surface", "a 3d model of a circular fence with black and white stripes"], "question": "which building is black and white", "label": 1}, {"captions": [" a woman with long wings, legs, hair, and spikes, accompanied by a spider with long legs.", " a house with a pink roof, brick walls, and insulated ceiling."], "sample_ids": ["68cf560d0c424ec6a3c58e1b9967508d", "c8936ace72954650b4e2d84246964849"], "properties": ["hair, legs, wings", "roof, color, pink"], "captions_pred_pc": ["of a white and black bird flying in front of a white background", "a black and white drawing of a toilet"], "captions_pred_image": ["a 3d model of a woman with a large spider-like creature on her back", "a 3d model of a house with a roof"], "question": "which entity has a roof that is the color pink", "label": 1}, {"captions": [" a small white and brown house with windows and a door.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["7de99253a4bc4d98bd941e40bbad8c7a", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["color, door, window", "house, pool, balcony"], "captions_pred_pc": ["of a person wearing a black and white striped shirt", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a police box royalty free 3d model preview no.2", "a 3d model of a modern house"], "question": "which house has a pool", "label": 1}, {"captions": [" of a small wooden house with two roofs.", " a house with roof trusses and wooden beams on a suspended ceiling."], "sample_ids": ["30fc23ae4edb42609e30e029dede54bd", "3c2e3a3670b042069bd8290e2c357702"], "properties": ["house, roof, wooden", "roof trusses, beams, suspended ceiling"], "captions_pred_pc": ["of a pair of stainless steel screws on a white background", "above a black and white drawing of a building"], "captions_pred_image": ["a 3d model of a small barn", "a 3d model of a house with a roof in progress royalty free 3d model preview no. 1"], "question": "which house has a suspended ceiling", "label": 1}, {"captions": [" of a white plastic tube or metal bar, resembling a knife.", " of a person breaking through a brick wall."], "sample_ids": ["8fd3836862a44a8d8b4d224bfc30c2c3", "0708be2186d24d52815e8ac9d751fc37"], "properties": ["a knife, blade, handle", "image, brick, wall"], "captions_pred_pc": ["a black and white image of a shelf with a white background", "for a black and white illustration of an ice cream cone"], "captions_pred_image": ["a piece of white plastic on a gray background", "a 3d model of a person breaking through a brick wall"], "question": "which image shows a person breaking through a brick wall?", "label": 1}, {"captions": [" a white plastic container with a lid, a small box, a cup, a bottle, and a jar.", " of a bearded man wearing a green shirt and a hat."], "sample_ids": ["20a02705a66f460492e07345e84a62ed", "1e4e5e8133ae48c797facaec724c13a5"], "properties": ["a box, a cup, a bottle, a jar", "hat, shirt, bearded"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "of a black and white bracelet on a white background"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic cap, and a plastic container", "a 3d model of a man with a beard"], "question": "which entity has a hat", "label": 1}, {"captions": [" a small house with a red roof.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["085db9059b744673b5623b5338e02196", "bf18bfd89efd43389781050230467d58"], "properties": ["roof, red, house", "Lights, number, five"], "captions_pred_pc": ["a black and white dotted square on a white background", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small shed in the snow", "a white chandelier with five white shades"], "question": "which entity has fewer lights", "label": 1}, {"captions": [" a colorful building with red and blue blocks, a yellow roof, and hanging from the ceiling.", " a large house with a roof on a platform."], "sample_ids": ["2a31c42de6f74ddba6b19b3467066e11", "cb3e09a301b746918a682a595037c7f7"], "properties": ["color, roof, block", "roof, platform, house"], "captions_pred_pc": ["a black and white drawing of a room with a lot of dots", "a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of a table with multiple tables stacked on top of each other", "a 3d model of a small house"], "question": "which entity has a roof on a platform?", "label": 1}, {"captions": [" a small white and brown house with windows and a door.", " a wooden roof structure with a pink roof."], "sample_ids": ["7de99253a4bc4d98bd941e40bbad8c7a", "b70565bda31d42958d3597bf6067ddd2"], "properties": ["color, door, window", "roof, color, pink"], "captions_pred_pc": ["of a person wearing a black and white striped shirt", "above a black and white image of a metal grate"], "captions_pred_image": ["a 3d model of a police box royalty free 3d model preview no.2", "a 3d model of the roof of a building"], "question": "which structure has a pink roof", "label": 1}, {"captions": ["a white of a gun.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["d7c12235efd1471db5b7145b63dbd11a", "b896a0898efe4059a776193c02132129"], "properties": ["color, white, gun", "- material is stone, metal, concrete"], "captions_pred_pc": ["of a white object on a white background", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a white 3d model of a rifle on a gray background", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": ["a small clay jug with a face, handle, and spout, depicted as a .", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["ceee98c20f23424195da092156905ec4", "6b745457e06840119058883b35f78f58"], "properties": ["face, handle, spout", "roof, color, blue"], "captions_pred_pc": ["a black and white image of a small, circular object", "a black and white image of a building with dots"], "captions_pred_image": ["a white ceramic vase with a face on it", "a 3d model of a house with a steeple on top"], "question": "which entity has a roof", "label": 1}, {"captions": [" a small white house with stairs and a wall-mounted shelf.", "a featuring white and red cubes, and a pink and white chair."], "sample_ids": ["10c4ba5b0db4490db9c00c21c94cb41f", "f2c44a82ba744ba8b93e9a1c2272c117"], "properties": ["house, color, white", "color, white, red, pink"], "captions_pred_pc": ["above a black and white drawing of a bench", "a black and white illustration of a house made of dots"], "captions_pred_image": ["a 3d model of a small white building", "a 3d model of a white structure with stairs"], "question": "which entity is a chair?", "label": 1}, {"captions": ["a featuring a small desk and chair, table, staircase, bathroom with sink, and square ceiling light in a small house setting.", " a suitcase on wheels, featuring a bird, a laptop, a robot, and a vacuum cleaner."], "sample_ids": ["1be04fc7b47c47e9aaa9d2720af16b87", "e8100bef7b8a48d4ac79684bffb349ba"], "properties": ["desk, chair, table", "Wheels, laptop, robot"], "captions_pred_pc": ["for a black and white image of a letter 'f'", "a black and white drawing of a cell phone"], "captions_pred_image": ["a 3d model of a staircase in a room", "a black and white image of a suitcase on wheels"], "question": "which entity has a laptop?", "label": 1}, {"captions": [" a house with a blue roof, chimney, and wooden-beamed ceiling.", " of a plague mask with a rusty, horned, wooden helmet and a crow's head design."], "sample_ids": ["b380dd4800124a8d96424a504eb0ec6a", "2b0896f810074399a5ae7d6dbab8c330"], "properties": ["roof, color, blue", "- material is wood, rusty, horned"], "captions_pred_pc": ["of a white lace clutch purse on a white background", "in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a building with many windows", "3d model of a plague doctor's mask"], "question": "which object is made of wood", "label": 1}, {"captions": [" a yellow monster-sphere with teeth.", " a small white house with a roof."], "sample_ids": ["fc5f906bab2c4c36a406ebdc15f58541", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["color, shape, texture", "roof, color, white"], "captions_pred_pc": ["a black and white illustration of a dandelion on a white background dandelion illustration on a white background illustration", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d model of a white ball with teeth", "a 3d model of a building with a white roof"], "question": "which entity has a roof that is white", "label": 1}, {"captions": [" a spacious office featuring numerous desks, chairs, computers, and a ceiling with blue and white triangles and a ceiling light.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["0d7e4d9471414a21b4a5b18a54f7ec22", "a17477b445b3443189dad22f768b888b"], "properties": ["ceiling, light, desks", "roof, pillar, stairs"], "captions_pred_pc": ["a black and white drawing of a square on a white background", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d model of an office space with desks and chairs", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": [" a vibrant city skyline featuring various colored buildings, trees, and skyscrapers.", " a building with yellow lines."], "sample_ids": ["1a1fb9b0d83845f6b1238fb45e0defff", "f18e34286cf54876874f55ecc9018492"], "properties": ["color, skyline, buildings", "color, yellow, lines"], "captions_pred_pc": ["a black and white illustration of a city skyline", "a black and white drawing of a map"], "captions_pred_image": ["a black and white 3d model of a city skyline", "a drawing of an airplane flying over a city"], "question": "which building has lines that are yellow?", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", "a white of a spaceship and building."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "bf7d4277c9184d35abdec85bd5e25956"], "properties": ["yellow, table, roof", "image, building, spaceship"], "captions_pred_pc": ["a black and white drawing of a floor plan", "a black and white drawing of a tree"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of a white object on a gray background"], "question": "which image shows a spaceship and building?", "label": 1}, {"captions": [" a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light.", " a small white table with stairs and a ladder, featuring a black and white kitchen hood and a black square ceiling light."], "sample_ids": ["5ea0962b100b4fccb761ed84afe027b5", "5565c16f297e405f9d5dbf0ebb623605"], "properties": ["house, table, chair", "table, stairs, ladder"], "captions_pred_pc": ["above a black and white photograph of an open door", "above a black and white photograph of a small square in the center of the image"], "captions_pred_image": ["a 3d rendering of a small white table with a chair", "a 3d model of a table with a stool on top"], "question": "which entity has stairs", "label": 1}, {"captions": [" of a small blue and green gazebo with a table and chairs.", " a white motorcycle with wings."], "sample_ids": ["0a3d553ed5d54c9794494af4f7a7e1c6", "7e684a7c012c4fd0ac91844f22457640"], "properties": ["color, gazebo, table", "color, white, wings"], "captions_pred_pc": ["a black and white illustration of a gazebo in the middle of a field of polka dots stock photography \u00a9 2018 iStock", "a black and white image of a pair of sunglasses"], "captions_pred_image": ["a 3d model of a small gazebo with a fountain in the center", "a 3d model of a motorcycle on a white background"], "question": "which entity is white", "label": 1}, {"captions": [" a small island with trees, water, and a river.", " a white rocking chair with a curved backrest."], "sample_ids": ["5a9c593092c04deaa0f17a1c28a79476", "ee0deb90abf943b6894cd5ded1331213"], "properties": ["water, river, island", "backrest, curved, yes"], "captions_pred_pc": ["in 15 words or less a black and white polka dot pattern on a white background", "a black and white illustration of a spiral pattern on a white background a black and white illustration of a spiral pattern on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a snowy landscape with trees in the foreground royalty free 3d model preview no.2", "a 3d model of a white chair royalty free 3d model no. 3"], "question": "which entity has a curved backrest", "label": 1}, {"captions": [" featuring a chair, table, and refrigerator.", " a small white house with stairs and a spiral staircase, featuring a white table and ceiling light."], "sample_ids": ["cd967e38e9364ed28c4090e905740c9d", "e9e1cc7fae22458197a61f43a9c355f4"], "properties": ["chair, table, refrigerator", "house, staircase, table"], "captions_pred_pc": ["a 3d rendering of a white surface with black dots on it", "above a black and white photograph of a dog in a frame"], "captions_pred_image": ["a 3d model of a chair, a table, and a refrigerator", "a 3d model of a small house with a spiral staircase"], "question": "which entity has a table", "label": 1}, {"captions": ["a low poly of a tree with red apples and a green cactus with red dots.", " a small house with a tree, pool, and pond in a green and blue landscape."], "sample_ids": ["fd7765e391cd49ccbc72891d90850cdb", "e22489ba182f45cb81f0a83f22abe9bd"], "properties": ["color, red, apples, tree, cactus, dots", "house, tree, pool"], "captions_pred_pc": ["a black and white illustration of a snowflake on a white background", "in 15 words or less a black and white image of a square with dots around it"], "captions_pred_image": ["a 3d model of a group of geometric shapes", "a 3d model of a house and a tree in a box royalty free 3d model preview no.3"], "question": "which entity has a pool?", "label": 1}, {"captions": [" of a wooden stand featuring two legs, six balls, and a row of teddy bears, resembling a puzzle toy, game board, and bookshelf.", " of an orange life ring"], "sample_ids": ["e36ba9c060cd49f48a0acc1790fcf049", "e27df4cbd35c4666b4690b58e1a3dad6"], "properties": ["resembles, toy, bookshelf", "color is orange, material is plastic, shape is ring"], "captions_pred_pc": ["a black and white image of a book cover", "of a silver bracelet with black dots on it"], "captions_pred_image": ["a 3d rendering of a wooden display stand with six cups on it", "an inflatable life preserver on a white background"], "question": "which object is made of plastic", "label": 1}, {"captions": [" of a wrecked plane, ship, and bird on a pile of rocks with grass.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["b0c703df20154bbf9fd8707c61137fc5", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["plane, ship, bird", "camera, speaker, ceiling fan"], "captions_pred_pc": ["a black and white watercolor map of the state of ohio", "for a black and white image of an object on a white background"], "captions_pred_image": ["a black and white photograph of a pile of debris on the ground", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has more speakers", "label": 1}, {"captions": [" a white rock with green grass and moss on it.", " a small white house with a roof."], "sample_ids": ["0b53ca3fca7b4ef7bef27cb287daf32e", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["color, grass, moss", "roof, color, white"], "captions_pred_pc": ["above a black and white drawing of a long, curved line on a white background", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d sculpture of a piece of white chocolate in the shape of a map", "a 3d model of a building with a white roof"], "question": "which object has a white roof", "label": 1}, {"captions": ["white dragon with wings, .", " a house with a pink roof, brick walls, and insulated ceiling."], "sample_ids": ["353a650264d64cf28e6e0a080f76ee28", "c8936ace72954650b4e2d84246964849"], "properties": ["wings, white, dragon", "roof, color, pink"], "captions_pred_pc": ["for a black and white silhouette of a dragon", "a black and white drawing of a toilet"], "captions_pred_image": ["a 3d model of a dragon flying in the sky royalty free 3d model preview no. 1", "a 3d model of a house with a roof"], "question": "which entity has a roof that is pink", "label": 1}, {"captions": [" a small white building with a door, resembling a box-shaped house.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["1b5fe88d0ff149ae9d8b4eb455c5c90c", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["shape is box, color is white, door is present", "house, roof, wooden"], "captions_pred_pc": ["a black and white image of a person standing in front of a white background", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a white, open shelving unit", "a black and white photograph of a birdhouse"], "question": "which house has a wooden roof", "label": 1}, {"captions": [" a four-legged metal workbench with shelves.", " a rusted metal barrel with a yellow and red warning sign and stripe on it."], "sample_ids": ["e93b633d477942d9b79ef8ab566473d6", "5a49ad82ef7a4d33badea2261720f518"], "properties": ["Four legs, Metal, Shelf", "rusty, warning, metal"], "captions_pred_pc": ["for a black and white illustration of a cross", "a black and white drawing of dots on a white background a black and white drawing of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a table with four legs", "a black and white photograph of a barrel"], "question": "which object is made of metal", "label": 1}, {"captions": [" a small, snow-covered house.", " a black and white striped box."], "sample_ids": ["0d00d10b90134dbe9ce7b2b3d6669237", "00fa8accaaad44c780efe0c04ed4a12b"], "properties": ["house, snow, cover", "color, black, white"], "captions_pred_pc": ["in 15 words or less a black and white image of a piece of paper on a white background royalty free illustration", "in 15 words or less a black and white pattern on a white background"], "captions_pred_image": ["a piece of broken glass on a white background", "a 3d image of a black and white striped surface"], "question": "which object is made of black and white?", "label": 1}, {"captions": [" a pink room featuring a bed, desk, window, and lamp.", " a molecule featuring green, red, and blue spheres."], "sample_ids": ["395af20de6fe49dbbbb030f0e452cbe1", "1c0e821eb7c4489dbff9e20d7e8575a3"], "properties": ["bed, desk, window", "color, sphere, molecule"], "captions_pred_pc": ["of a black and white drawing of a curved line", "a black and white photograph of a group of geometric shapes arranged in the shape of a diamond"], "captions_pred_image": ["a 3d model of a bedroom royalty free 3d model preview no.2", "a 3d model of a molecule in the shape of a pyramid"], "question": "which entity is a molecule?", "label": 1}, {"captions": [" a small building with a school, house, soccer field, and white ceiling light, featuring a white and green paper clip.", " a small wooden cabinet with a drawer, doubling as a bedside table."], "sample_ids": ["97f487941d26472294e005fa97c403be", "3755f3c19ae549c4bf708462db1b2581"], "properties": ["color, white, green", "Cabinet, drawer, wood"], "captions_pred_pc": ["for a black and white drawing of a computer keyboard", "a black and white image of a square with dots all over it"], "captions_pred_image": ["a 3d model of an office building", "a 3d model of a wooden box with a lid"], "question": "which object is made of wood", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", "pink light saber."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "c4c8ed95de4640fe8c0be1df68d816ba"], "properties": ["color, shape, and size", "color, pink, light"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "of a black dot on a white background"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d model of a light saber on a gray background"], "question": "which object is pink?", "label": 1}, {"captions": [" a camera with a blue screen and lens.", " a house with wooden framing and trusses."], "sample_ids": ["904be230d5ac43a6a9604abc946febc7", "4501794e257c4a8ba60a94757d8e93a9"], "properties": ["screen, lens, color", "frame, trusses, wood"], "captions_pred_pc": ["a black and white drawing of a camera on a white background", "a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a digital camera royalty-free 3d model preview no 2", "a 3d model of a house under construction"], "question": "which entity is made of wood", "label": 1}, {"captions": [" of a human skull", " a small white house with a roof."], "sample_ids": ["3550b1feb37745b6a6dbec510ccb740b", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["a, b, c", "roof, color, white"], "captions_pred_pc": ["a hedgehog in the center of the image a hedgehog in the center of the image on a white background royalty free illustration", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d model of a human skull royalty-free 3d model preview no.1", "a 3d model of a building with a white roof"], "question": "which object has a roof", "label": 1}, {"captions": [" a small white house with a staircase and a window.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["9eb88d17310d42dda9e17883e9922525", "c3a82df41875402285608ef13a55df57"], "properties": ["house, staircase, window", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white illustration of a person standing in the middle of a crowd of tiny black dots on a white background royalty free illustration", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d rendering of a small room with a staircase", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a house with a yard, trees, bushes, and surrounding buildings.", " a house with a wooden-framed roof structure."], "sample_ids": ["7f8942ef51dd4246993a587a12df168c", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["house, yard, surrounding buildings", "roof, material, wood"], "captions_pred_pc": ["a black and white image of a truck on a white background", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a house in the middle of a field", "a 3d model of a building with a roof"], "question": "which house has a wooden-framed roof structure", "label": 1}, {"captions": [" of a white rock-like object, possibly a shell or ice.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["096e42b466ec438d95c5d89a85191534", "c3a82df41875402285608ef13a55df57"], "properties": ["white, rock, shell", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["in one hundred words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a white rock on a gray background", "a white plastic object on a gray background"], "question": "which object is whiter", "label": 1}, {"captions": ["a featuring a tree stump, mossy wood, leaves, and a rock with grass.", " a stone, wood, rock, sliced bread, and a skull with a blue hat."], "sample_ids": ["2527cd763a1a43f9870eb65e44e79f7d", "0169af65ffc64bbf8e2fe6c6de08d485"], "properties": ["mossy, rock, grass", "hat, skull, bread"], "captions_pred_pc": ["a black and white image of a person on a skateboard", "a black and white illustration of a skull in the shape of dots"], "captions_pred_image": ["a 3d model of a piece of wood on a white background", "a black and white image of a stone sculpture"], "question": "which entity has a skull with a blue hat?", "label": 1}, {"captions": [" a pink-framed building structure with beams and trusses.", " a small, snow-covered house."], "sample_ids": ["18e392c5360146eda498c5edab25b15c", "0d00d10b90134dbe9ce7b2b3d6669237"], "properties": ["frame, beams, trusses", "house, snow, cover"], "captions_pred_pc": ["a black and white drawing of a metal grate", "in 15 words or less a black and white image of a piece of paper on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a building under construction", "a piece of broken glass on a white background"], "question": "which entity is covered in snow", "label": 1}, {"captions": [" of a wooden staircase with marble floor and wooden railings in a house.", " of an egyptian sarcophagus."], "sample_ids": ["ee70964ce5e841bd87381cff40d59b88", "70aa484af2ab44149a608dd81a6ff459"], "properties": ["floor, staircase, railings", "sarcophagus, material, wood"], "captions_pred_pc": ["a black and white drawing of a light switch", "a black and white circular pattern on a white background"], "captions_pred_image": ["a 3d model of a staircase on a marble surface", "a black and white photograph of a sphere with egyptian hieroglyphics on it"], "question": "which object is made of wood", "label": 1}, {"captions": ["a featuring a boat, table, chairs, umbrella, and solar panel.", "s of a boat, bird, paper airplane, and kite flying in the air."], "sample_ids": ["0f0eb3a198d341d28f809b6d7634be8a", "795cfa41d48a4cfc893ff1981318594d"], "properties": ["boat, table, chairs, umbrella, solar panel", "s, boat, bird, airplane, kite"], "captions_pred_pc": ["a black and white illustration of a boat with an umbrella", "above a 3d illustration of a boy standing with his arms outstretched"], "captions_pred_image": ["a 3d model of a boat, a table, chairs, and an umbrella", "a white kite flying in the air against a gray background"], "question": "which entity has a boat", "label": 1}, {"captions": [" a wooden-framed house with roof trusses.", " a house with a roof and beams."], "sample_ids": ["e60dd370c5ec468da4689a801f951157", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["frame, roof, trusses", "roof, beams, house"], "captions_pred_pc": ["a black and white drawing of a metal grate", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a house under construction", "a 3d model of a building with a roof"], "question": "which house has a roof with beams", "label": 1}, {"captions": [" a building or house with a roof and floor plan, resembling a pyramid with a flat roof.", "a featuring a chair, a building, a glass tower, and a throne made of money, with various unique elements such as a green roof and a computer chip."], "sample_ids": ["7a91292e1ed64e60a1bbbb499209a0df", "18d2e75f23474d7489a6d7d605dfc76d"], "properties": ["apse, roof, floor plan", "throne, chair, building"], "captions_pred_pc": ["a black and white drawing of a room", "a black and white illustration of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a building in the shape of a pyramid", "a 3d model of a building on top of a table"], "question": "which entity has a throne made of money", "label": 1}, {"captions": [" a house with a blue roof, chimney, and wooden-beamed ceiling.", " a white castle composed of small cubes."], "sample_ids": ["b380dd4800124a8d96424a504eb0ec6a", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["roof, color, blue", "composed of, white, cubes"], "captions_pred_pc": ["of a white lace clutch purse on a white background", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["a 3d model of a building with many windows", "a 3d model of a castle made of white cubes"], "question": "which entity is composed of small cubes", "label": 1}, {"captions": [" of a wooden stand featuring two legs, six balls, and a row of teddy bears, resembling a puzzle toy, game board, and bookshelf.", "a wooden-handled axe ."], "sample_ids": ["e36ba9c060cd49f48a0acc1790fcf049", "439ad3cdddbc4211b8a3c98448e900cc"], "properties": ["resembles, toy, bookshelf", "handle material is wood, head material is metal, overall length is long"], "captions_pred_pc": ["a black and white image of a book cover", "a black and white image of a hammer on a white background"], "captions_pred_image": ["a 3d rendering of a wooden display stand with six cups on it", "3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe 3d model of an axe"], "question": "which object is made of wood", "label": 1}, {"captions": [" a brick wall with grass.", "a 3d white cube with windows resembling a building."], "sample_ids": ["53f2d948091f417cb580e22469c94db2", "4a07a5293f024bb0a353954a056ef626"], "properties": ["brick, grass, wall", "- material is white- color is white- texture is textured"], "captions_pred_pc": ["above a black and white illustration of an underwater scene", "a black and white image of a square made up of small dots"], "captions_pred_image": ["a black and white photo of a brick wall and a puddle", "a 3d model of a cube"], "question": "which entity is not a building?", "label": 0}, {"captions": ["a pixelated-textured purple sphere .", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["fb68393941804e769d5c9b372864a642", "c3a82df41875402285608ef13a55df57"], "properties": ["texture, color, shape", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white square made of dots on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a gray sphere on a white background", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" of a wooden cabinet with drawers and filing features.", " a white plastic box/tray with a hole in the middle."], "sample_ids": ["abbc90bbd5474f73b16482ccd10e07ec", "04f8bfad8ad14795aced8a83ea30ca60"], "properties": ["Cabinet, Drawers, Filing", "color is white, material is plastic, shape is box"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white image of a rectangular tray on a white background"], "captions_pred_image": ["a 3d rendering of a white wooden chest of drawers", "a 3d model of a white plastic tray"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it.", " a rusted metal barrel with a yellow and red warning sign and stripe on it."], "sample_ids": ["9bcb7cc44b444326bc426cd9e2aacf60", "5a49ad82ef7a4d33badea2261720f518"], "properties": ["- material is plastic- color is green- shape is box", "rusty, warning, metal"], "captions_pred_pc": ["a black and white illustration of a toilet brush and toilet brush holder", "a black and white drawing of dots on a white background a black and white drawing of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a plastic box with several compartments", "a black and white photograph of a barrel"], "question": "which object is made of metal", "label": 1}, {"captions": [" a small village featuring houses, trees, and a winding road.", " of a person breaking through a brick wall."], "sample_ids": ["7acf46c0265d4e39b97ac084852abde8", "0708be2186d24d52815e8ac9d751fc37"], "properties": ["houses, trees, road", "image, brick, wall"], "captions_pred_pc": ["in 15 words or less a black and white photo of a mountain landscape", "for a black and white illustration of an ice cream cone"], "captions_pred_image": ["a black and white photograph of a small town", "a 3d model of a person breaking through a brick wall"], "question": "which image shows a person breaking through a wall?", "label": 1}, {"captions": [" a bridge structure with a pier, stairway, railings, and red and green elements.", " a small wooden cabinet with a drawer, doubling as a bedside table."], "sample_ids": ["23701a9674204ea881fb8af9914b1924", "3755f3c19ae549c4bf708462db1b2581"], "properties": ["color, pier, railings", "Cabinet, drawer, wood"], "captions_pred_pc": ["a black and white illustration of a computer keyboard", "a black and white image of a square with dots all over it"], "captions_pred_image": ["a 3d model of a staircase in the middle of a field", "a 3d model of a wooden box with a lid"], "question": "which entity is made of wood", "label": 1}, {"captions": ["a 3d wooden pole with a metal spear handle.", " a blue plastic stool."], "sample_ids": ["1d5cf234576e41f0ba209c5e19d2db47", "2ea7d871e2f64c7daa6977a8d268b45e"], "properties": ["- material is wood, metal, metal", "color, plastic, blue"], "captions_pred_pc": ["of a black and white illustration of a vase on a pedestal", "a black and white drawing of a chair with dots"], "captions_pred_image": ["a black and white image of an old-fashioned pitchfork", "a 3d model of a white chair"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a wooden desk with drawers and a green top.", "a white glass beer mug."], "sample_ids": ["ae41fa78e18748ab89571113754ea59a", "1d686cbd3e9a4c629a43088658989286"], "properties": ["top, color, wood", "color, white, glass"], "captions_pred_pc": ["a black and white drawing of a fireplace", "a black and white drawing of a beer mug on a white background"], "captions_pred_image": ["a 3d model of a desk royalty free 3d model preview no 3", "a 3d model of a glass pitcher"], "question": "which object is made of glass", "label": 1}, {"captions": [" of a destroyed building with a watercolor painting of a dilapidated house.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["5a33f024faf145ac80cdadcdfef8a797", "6b745457e06840119058883b35f78f58"], "properties": ["image, building, painting", "roof, color, blue"], "captions_pred_pc": ["above a black and white drawing of a building", "a black and white image of a building with dots"], "captions_pred_image": ["a black and white photograph of a damaged house", "a 3d model of a house with a steeple on top"], "question": "which building has a blue roof", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", "a white statue of a woman holding a baby."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "7cf1c30a98fc4ae796e040c2ce18c03a"], "properties": ["color, shape, and size", "color, white, statue"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "a black and white drawing of a heart"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d printed figurine of a woman holding a cat"], "question": "which entity is a statue?", "label": 1}, {"captions": [" a white plastic container with a lid, a small box, a cup, a bottle, and a jar.", " a small house with stairs and a roof."], "sample_ids": ["20a02705a66f460492e07345e84a62ed", "e9305c80010f4e3b9de9789f01a9bee5"], "properties": ["a box, a cup, a bottle, a jar", "roof, stairs, house"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "above a black and white image of a square with dots all over it"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic cap, and a plastic container", "a 3d rendering of a podium on a wooden floor"], "question": "which entity has a roof", "label": 1}, {"captions": ["a featuring a small room with a bunk bed, desk, chair, table, and a blue house.", " a stone, wood, rock, sliced bread, and a skull with a blue hat."], "sample_ids": ["dd3a9323ed514ccab330973ff9588015", "0169af65ffc64bbf8e2fe6c6de08d485"], "properties": ["room, bed, desk", "hat, skull, bread"], "captions_pred_pc": ["a black and white drawing of a door", "a black and white illustration of a skull in the shape of dots"], "captions_pred_image": ["a 3d model of a small room with a bunk bed", "a black and white image of a stone sculpture"], "question": "which entity has a skull?", "label": 1}, {"captions": [" a building with wooden and steel structures, featuring stairs, railings, and a ceiling with numerous pipes.", " a small white table with stairs and a ladder, featuring a black and white kitchen hood and a black square ceiling light."], "sample_ids": ["26c47880756b4876b4f263373c3c5303", "5565c16f297e405f9d5dbf0ebb623605"], "properties": ["building, material, steel", "table, stairs, ladder"], "captions_pred_pc": ["a black and white drawing of a floor plan", "above a black and white photograph of a small square in the center of the image"], "captions_pred_image": ["a 3d model of a large structure with multiple levels", "a 3d model of a table with a stool on top"], "question": "which entity has a ladder?", "label": 1}, {"captions": [" a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["f6c6e7a65a3e42dfa431b1d984c72f28", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["house, fence, dog", "a, material, clay"], "captions_pred_pc": ["above a black and white drawing of a bathroom", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": [" a pyramid with blue and pink lines, wires, and mesh.", "a white of a man in a suit and mask, possibly a diving suit."], "sample_ids": ["a3b2db8d5c6044f88b275839d0cd71bd", "205251e4277e41d1aae6b2358267ad56"], "properties": ["color, shape, material", "image, color, white"], "captions_pred_pc": ["a black and white image of a patterned rug", "a black and white image of a beetle on a white background"], "captions_pred_image": ["a 3d model of the pyramid roof royalty-free 3d model preview no.1", "a 3d printed figurine of an alien creature"], "question": "which entity is a white image?", "label": 1}, {"captions": [" a small, rocky island with diverse terrain and scattered rocks.", "a featuring a building, a plane, a printing machine, a large machine, a room with a computer, and a large gray box."], "sample_ids": ["09f2cf267e954c958828325067bcc36a", "7e2b63ba4ce24cecacea67dd052016c1"], "properties": ["island, terrain, rocks", "building, plane, room"], "captions_pred_pc": ["above a black and white photo of a small island in the middle of a body of water", "a black and white image of a rectangular frame with dots on a white background a black and white image of a rectangular frame with dots on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of a piece of dirt on the ground", "a 3d model of a box with a lot of items inside"], "question": "which entity has a room?", "label": 1}, {"captions": [" a red and white robot.", "a 3d printed model of white and red robots on a red platform."], "sample_ids": ["75e058152d8e438a9c6f8abf112e8d89", "0fa560c2fd9f4422a97356c947ac3b46"], "properties": ["color, red, white", "color, red, white"], "captions_pred_pc": ["a black and white drawing of a monkey wearing a hat", "a black and white image of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a robot standing in a neutral pose royalty free 3d model preview no 3", "a 3d model of a robot standing on top of a table"], "question": "which robot is white?", "label": 0}, {"captions": [" of a rock formation with a white cliff and a rock.", " a brick wall with grass."], "sample_ids": ["4a25f6dfbea943bca137dacd2f7b984f", "53f2d948091f417cb580e22469c94db2"], "properties": ["image is rock formation with a white cliff and a rock", "brick, grass, wall"], "captions_pred_pc": ["above a black and white map of spain on a white background", "above a black and white illustration of an underwater scene"], "captions_pred_image": ["a black and white image of a rock formation on a gray background", "a black and white photo of a brick wall and a puddle"], "question": "which entity is a wall?", "label": 1}, {"captions": ["white 3d tank model", " a clear glass table with metal legs and balls on top."], "sample_ids": ["ac17fa1c967e4af382de4a445049f231", "7c2bfa826f274377ac21f48d510848c3"], "properties": ["color is white, model is 3d, name is tank", "glass, metal, balls"], "captions_pred_pc": ["a black and white photo of a small object on a white background", "a black and white image of a wine glass"], "captions_pred_image": ["a 3d model of a tank on a white background royalty free 3d model preview no 2", "a clear acrylic foosball table"], "question": "which object is made of glass", "label": 1}, {"captions": [" a house featuring a roof with truss system, framing, insulation, and a ceiling light.", "a featuring a white box with a hole, a paper clip, and a lock."], "sample_ids": ["39876e69e3914d99a07e0dc59611c5c0", "839e143bb1684aaa955f2c3e0cf4eef2"], "properties": ["roof, truss system, framing", "box, paper clip, lock"], "captions_pred_pc": ["a black and white drawing of a window with dots all over it", "above a black and white image of a clock in the shape of a spiral"], "captions_pred_image": ["a 3d model of the roof of a house", "a 3d model of a stapler with a staple in it royalty free 3d model no."], "question": "which entity has a lock?", "label": 1}, {"captions": [" a crab with long legs, wires, and tentacles, resembling a hybrid of an octopus, squid, spider, and robot.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["ec5914b53b6a4cde8de4820050bc46c5", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["resembles, octopus, squid, spider, robot", "island, terrain, water"], "captions_pred_pc": ["a black and white illustration of a jellyfish", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a robotic octopus on a white background royalty free 3d model preview no.2", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" the white text \"ekberkaslan\" with a row of white cubes and a numbered box.", "a 3d white cube with windows resembling a building."], "sample_ids": ["87ee30d475f34b799c24bf7ef3a7b540", "4a07a5293f024bb0a353954a056ef626"], "properties": ["- color is white- shape is cubes- number is 1", "- material is white- color is white- texture is textured"], "captions_pred_pc": ["a close up of a black and white striped scarf", "a black and white image of a square made up of small dots"], "captions_pred_image": ["a 3d image of the word ebercaskalan on a white background", "a 3d model of a cube"], "question": "which object is whiter", "label": 1}, {"captions": [" a small gnome chandelier with three candles, featuring red and blue lights and adorned with pink and blue ribbons.", " a large metal building with a roof and truss structure."], "sample_ids": ["6398f4e75a2d480da58396827ac64249", "b85a99699ccd4bcba213322113bb253d"], "properties": ["- color is red, blue, pink", "roof, truss, structure"], "captions_pred_pc": ["for a black and white image of an object on a white background", "of a metal grate on a white background"], "captions_pred_image": ["a white chandelier with three light bulbs hanging from the ceiling", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": ["a 3d-rendered large stainless steel tank on wheels.", " a house with a flat roof structure."], "sample_ids": ["160a243bf19741c4bd1c0ca9ab166851", "abc52d210d71415296730bb00352ce6f"], "properties": ["size, material, wheels", "roof, flat, structure"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a microscopic virus on a white background royalty free stock illustration", "a black and white drawing of a window with dots around it"], "captions_pred_image": ["a 3d rendering of a stainless steel tank on wheels", "a 3d model of a house with a roof"], "question": "which structure has a flat roof", "label": 1}, {"captions": [" a cityscape featuring various buildings, trees, an airport runway, and a plane.", " a small island featuring a large building, trees, and a house on a hill, surrounded by a forest."], "sample_ids": ["9b45036d3f0342a78db9a25938dc77fc", "d557c62e9be741a6b0f6b204d11a9c6f"], "properties": ["building, tree, plane", "house, hill, forest"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a snowflake in the shape of a snowflake royalty free illustration", "above a black and white illustration of a small island in the middle of a body of water"], "captions_pred_image": ["a black and white image of an industrial area with buildings and trucks in the foreground", "a black and white image of a small island in the middle of a body of water"], "question": "which entity has a house on a hill?", "label": 1}, {"captions": [" a floating small island with trees, grass, and a mountain.", " a small house with a staircase, balcony, and wooden floor."], "sample_ids": ["95e0d4e2464b433dbb6c4d1d30e8150f", "e67e211004cb450cbaf8139dd74ba39b"], "properties": ["island, mountain, grass", "floor, staircase, balcony"], "captions_pred_pc": ["a black and white map of the island of malta", "a black and white drawing of a wallet"], "captions_pred_image": ["a 3d image of a small island in the middle of a lake", "a 3d model of a bench on a wooden floor"], "question": "which entity has a wooden floor", "label": 1}, {"captions": [" a tree with a blue ball and a mushroom.", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["4bb8528bd042471f8865cce122a03924", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["a, ball, mushroom", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["a black and white illustration of an airplane flying over a tree", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a 3d model of a tree with a ball on top of it royalty free 3d model preview no.2", "a 3d rendering of a plastic box with several compartments"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a diverse town featuring houses, buildings, people, animals, and desert elements.", " a white castle composed of small cubes."], "sample_ids": ["436d6492fa06466680ecc82e5e07a7a0", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["house, building, people", "composed of, white, cubes"], "captions_pred_pc": ["a black and white drawing of a person sitting on a bench", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["a 3d model of a small town in the middle of a field", "a 3d model of a castle made of white cubes"], "question": "which entity is composed of small cubes", "label": 1}, {"captions": [" a small white building with a door, resembling a box-shaped house.", " a small wooden house with a green roof."], "sample_ids": ["1b5fe88d0ff149ae9d8b4eb455c5c90c", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["shape is box, color is white, door is present", "roof, color, green"], "captions_pred_pc": ["a black and white image of a person standing in front of a white background", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d model of a white, open shelving unit", "a 3d model of a house with a ladder"], "question": "which house has a green roof", "label": 1}, {"captions": [" of a pile of metal, torn and shredded paper, rocks, and a decayed animal.", " of a black gun/rifle"], "sample_ids": ["c117f1923cad4ecf9df61b6e3d633374", "b596b52b86914c2b9bc4b4cd096621db"], "properties": ["a pile of metal, torn and shredded paper, rocks, and a decayed animal", "color is black, material is metal, type is gun/rifle"], "captions_pred_pc": ["a black and white map of germany on a white background", "a black and white image of a gun on a white background"], "captions_pred_image": ["a 3d model of a crumpled piece of paper on a white background", "a 3d model of a black gun on a gray background"], "question": "which object is made of metal", "label": 1}, {"captions": [" a hand featuring red, green, and blue stripes.", " a destroyed car with rusted, broken metal and torn paper."], "sample_ids": ["6ea21f8f91b04144b43aa4b606339de3", "3fe31c3bf5cd4574a8ca02222411a988"], "properties": ["color, red, green, blue", "metal, rusted, paper"], "captions_pred_pc": ["above a black and white image of a glove", "a black and white drawing of a person sitting in a chair"], "captions_pred_image": ["a 3d model of a white and gray glove", "a black and white image of a piece of debris on the ground"], "question": "which entity is more likely to be rusted", "label": 1}, {"captions": [" a wooden object, including a board, piece of wood, box, and shelf.", " of a white shelf with four legs and hooks, accompanied by a ceiling light fixture."], "sample_ids": ["c986212445a1466ca7be7b5ac6bea729", "5017581716c7402581a82ebf08d427a3"], "properties": ["wood, board, shelf", "Shelf, color, white"], "captions_pred_pc": ["a black and white drawing of snowflakes on a white background", "a black and white doormat on a white background stock illustration \u00a9 2018 iStock"], "captions_pred_image": ["a 3d rendering of a piece of marble", "a 3d rendering of a white shelf"], "question": "which shelf is white", "label": 1}, {"captions": ["a 3d object featuring a rock, shell, piece of paper, and cat.", "a 3d white cube featuring various text and logos, including \"gypsy stribes,\" \"guinea pig studios,\" \"guillaume pi sodds,\" \"happy studios,\" and \"guillem pie sos.\""], "sample_ids": ["53efab50e5a74e5ea165c763cea15be4", "5d08c34bfb2c4c9b9538e24d68761331"], "properties": ["a, rock, paper", "- material is plastic- color is white- shape is cube"], "captions_pred_pc": ["for a flock of birds in the sky", "of a black and white photo of a person sitting on a bench"], "captions_pred_image": ["a black and white image of a piece of paper in the shape of an island", "a 3d image of a cube with the word 'stories' written on it"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a small house with a tree, pool, and pond in a green and blue landscape.", " a small island featuring a large building, trees, and a house on a hill, surrounded by a forest."], "sample_ids": ["e22489ba182f45cb81f0a83f22abe9bd", "d557c62e9be741a6b0f6b204d11a9c6f"], "properties": ["house, tree, pool", "house, hill, forest"], "captions_pred_pc": ["in 15 words or less a black and white image of a square with dots around it", "above a black and white illustration of a small island in the middle of a body of water"], "captions_pred_image": ["a 3d model of a house and a tree in a box royalty free 3d model preview no.3", "a black and white image of a small island in the middle of a body of water"], "question": "which house is on a hill?", "label": 1}, {"captions": [" a white rock formation with pebbles and ice elements.", " a large house with a roof on a platform."], "sample_ids": ["94d94f5a75de4bd0a43b08609630876e", "cb3e09a301b746918a682a595037c7f7"], "properties": ["color, shape, texture", "roof, platform, house"], "captions_pred_pc": ["a black and white drawing of a pair of shoes on a white background royalty free illustration", "a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of a small rocky island in the middle of a body of water", "a 3d model of a small house"], "question": "which entity is a building?", "label": 1}, {"captions": ["a white 3d object featuring black and white patterns, resembling a combination of a dish, smoke detector, cake, bowl, alarm clock, and ceiling light.", " of a mannequin head wearing a leather plague mask with straps."], "sample_ids": ["6a8cc820f00a4cfc954d56e2b1f6206a", "7821b30c2f8545ccac3e8b8a305d5082"], "properties": ["- material is plastic- color is white- shape is cylinder", "mannequin head, plague mask, straps"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a plate", "above a black and white image of a person's face with dots"], "captions_pred_image": ["a cake with a black and white design on the top of the cake", "a plague doctor's mask on a mannequin head"], "question": "which object is made of leather", "label": 1}, {"captions": ["a featuring a teddy bear on a rainbow, accompanied by a baby, a cat, a dog, and a drink, with a pink and green sign displaying \"close the sky over dreams.\"", " a potted christmas pine tree."], "sample_ids": ["80dfbe37b3d74f11b712ca1ad6570f70", "460c8f3034a844159826fac3b8aa35a5"], "properties": ["image, color, pink", "a, color, green"], "captions_pred_pc": ["above a black and white photograph of a dog in a bowl", "a black and white illustration of a snowflake on a white background"], "captions_pred_image": ["a 3d sculpture of an animal on a piece of paper", "a 3d model of a christmas tree in a vase"], "question": "which image is green?", "label": 1}, {"captions": ["a 3d collection featuring a cash register, destroyed car, pos machine with credit card machine, broken cell phone, black and blue phone, atm machine, crocodile's head, and broken roof.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["d9681d1f6fad42ab8d498cba24339ca8", "6b745457e06840119058883b35f78f58"], "properties": ["pos machine, credit card machine, cash register", "roof, color, blue"], "captions_pred_pc": ["a black and white illustration of a glass bottle", "a black and white image of a building with dots"], "captions_pred_image": ["a vintage cash register sitting on top of a table", "a 3d model of a house with a steeple on top"], "question": "which entity has a blue roof", "label": 1}, {"captions": [" of a wooden windmill with a red roof.", " a house with a pink roof, truss, and a square white ceiling lamp."], "sample_ids": ["2ad8fca30285483d8b7f602fa078215d", "91b2e9e4660946f5b4808a18b5323b69"], "properties": ["roof, color, red", "roof, truss, lamp"], "captions_pred_pc": ["a white and black image of a snowflake in the shape of a snowflake", "a black and white pattern of dots on a white background"], "captions_pred_image": ["a 3d model of a windmill on a gray background", "a 3d model of a house with a metal roof"], "question": "which roof is pink", "label": 1}, {"captions": [" a city featuring various buildings, including one with a white roof and numerous white cubes, as well as a plane.", " of a wooden building frame with truss and roof structure."], "sample_ids": ["a3c50635c2a04e548e57d4f027899131", "1313f8185cf24f3bbd73ff4e4ddfab3e"], "properties": ["building, roof, white, cubes", "frame, truss, roof"], "captions_pred_pc": ["above a black and white image of a map", "a black and white image of a ladder on a white background"], "captions_pred_image": ["a 3d model of a city on a white background", "a 3d rendering of a bridge over a road"], "question": "which entity has a roof", "label": 1}, {"captions": [" a molecule featuring green, red, and blue spheres.", " of a large axe with a cross on top."], "sample_ids": ["1c0e821eb7c4489dbff9e20d7e8575a3", "4b6734945e204158b076a429a30ce2e9"], "properties": ["color, sphere, molecule", "axe, cross, top"], "captions_pred_pc": ["a black and white photograph of a group of geometric shapes arranged in the shape of a diamond", "for an axe on a white background"], "captions_pred_image": ["a 3d model of a molecule in the shape of a pyramid", "a black and white image of an axe on a gray background"], "question": "which object has a cross on top?", "label": 1}, {"captions": ["a 3d white cube with windows resembling a building.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["4a07a5293f024bb0a353954a056ef626", "c3a82df41875402285608ef13a55df57"], "properties": ["- material is white- color is white- texture is textured", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white image of a square made up of small dots", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a cube", "a white plastic object on a gray background"], "question": "which object is whiter", "label": 1}, {"captions": [" a shattered piece of paper, resembling a broken phone and a flying newspaper.", " a large rock structure with a cave and small hole, resembling a stone sculpture and featuring a white plastic bag."], "sample_ids": ["80d02e2b6ceb4a3a81b6b67d2d98bc0a", "120bf1525e8649d9bdf3a593fe8f5ddc"], "properties": ["shattered, resembles, broken phone", "resembles, sculpture, rock"], "captions_pred_pc": ["for black ink brush strokes on a white background", "a black and white illustration of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a snowboarder in mid-air over a snowy landscape", "a 3d model of a bag with a zipper on it"], "question": "which entity is a rock?", "label": 1}, {"captions": [" of a stage featuring a black dj booth with purple lighting and a kitchen hood.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["a163994b0faa40d2a12097ef1d9e8642", "a17477b445b3443189dad22f768b888b"], "properties": ["color, lighting, booth", "roof, pillar, stairs"], "captions_pred_pc": ["in 15 words or less a black and white image of a piece of paper with dots on it", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d model of a stage with a podium and a microphone stand royalty free 3d model preview no.2", "a 3d model of a small building with a balcony"], "question": "which entity has a roof?", "label": 1}, {"captions": [" a city with buildings, houses, trees, and grass.", "a white of a spaceship and building."], "sample_ids": ["bc649e19956041cf89c1572f1a33cff1", "bf7d4277c9184d35abdec85bd5e25956"], "properties": ["buildings, houses, grass", "image, building, spaceship"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a snowflake on a white background royalty free illustration", "a black and white drawing of a tree"], "captions_pred_image": ["an image of a pile of trash on the ground", "a 3d model of a white object on a gray background"], "question": "which image shows a spaceship and building?", "label": 1}, {"captions": [" a black and gold pistol.", " a black and white striped box."], "sample_ids": ["402b57a5e32540b4938db1a9d3976220", "00fa8accaaad44c780efe0c04ed4a12b"], "properties": ["color, black, gold", "color, black, white"], "captions_pred_pc": ["a black and white image of a toothbrush", "in 15 words or less a black and white pattern on a white background"], "captions_pred_image": ["a 3d model of a gun royalty-free 3d model preview no.", "a 3d image of a black and white striped surface"], "question": "which object is made of black and white?", "label": 1}, {"captions": [" a car dealership interior featuring a showroom, repair shop, and various elements like a booth, bed, and ceiling light.", "a featuring a small room with a table, chair, laptop, and a white box containing a teddy bear and a piece of paper."], "sample_ids": ["3e22efacf9ee40a1a6b2e4b72a7314d2", "24f7d0a06d494c26a1678d81b2b7b093"], "properties": ["a, booth, bed", "a, laptop, teddy bear"], "captions_pred_pc": ["a black and white drawing of a tv screen with dots all over it royalty free illustration", "a black and white drawing of an umbrella on a white background"], "captions_pred_image": ["a 3d rendering of a room with a black and white color scheme", "a 3d rendering of a white room with various objects"], "question": "which entity has a laptop?", "label": 1}, {"captions": [" a metal-framed wall with red and blue bars in a steel structure.", " of a small white building with stairs and a lid."], "sample_ids": ["fefc99453e2d4406a9668d5697224c0f", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["color, red, blue, structure", "building, stairs, lid"], "captions_pred_pc": ["a black and white image of a person holding a toothbrush", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a metal frame structure", "a 3d model of a white box on a gray background"], "question": "which entity has a lid?", "label": 1}, {"captions": [" a small white bookcase-like building with stairs and a light fixture.", " a small building with stairs and a glass floor, featuring a square table and a black square ceiling light."], "sample_ids": ["5f99eb9d1f1e4d57b5690446f832c841", "8aaad713b8834739b008ccf2f3d86cce"], "properties": ["building, color, white", "floor, table, light"], "captions_pred_pc": ["in 15 words or less a black and white image of the letter 'f' made up of dots", "above a black and white photograph of a window"], "captions_pred_image": ["a 3d model of a bookshelf on a white background", "a black and white 3d model of a staircase on a platform"], "question": "which building has a glass floor", "label": 1}, {"captions": ["a featuring a robot, a black and white box, a man with a suitcase, a door, a large white building, and a white shelf.", "s of a laptop, printer, building, and small bathroom, along with a blue-lit box, white and blue truck, and hp color inkjet cartridge."], "sample_ids": ["4be12bf79ead47a68cd67fc01a6e5c8c", "747110c073314ee39ef2f4a8d63222da"], "properties": ["a, door, suitcase", "s, laptop, printer"], "captions_pred_pc": ["a black and white drawing of a dog in a cage", "a black and white image of a pair of scissors"], "captions_pred_image": ["a 3d model of a robot standing in front of an open door", "a black and white image of an electronic device"], "question": "which entity has a door?", "label": 0}, {"captions": [" a wooden-framed house with roof trusses.", " of a small house featuring a flat, brown roof, table with two chairs and a stool, ceiling light, and a window."], "sample_ids": ["e60dd370c5ec468da4689a801f951157", "3a509431d96b43f8a7aebe2846f08b96"], "properties": ["frame, roof, trusses", "roof, brown, flat"], "captions_pred_pc": ["a black and white drawing of a metal grate", "a black and white drawing of a snowflake on a white background a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a house under construction", "a 3d rendering of a table and stool"], "question": "which house has a flat roof", "label": 1}, {"captions": [" of a japanese-style pagoda house in a pixelated village.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["42c4e6ca4a0c4b7b9a97d543b2442222", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["image is a japanese-style pagoda house in a pixelated village", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white drawing of a square on a white background stock illustration \u00a9 2019 iStock", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a japanese temple and pagoda", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a house featuring a roof, floor plan, heating system, and ceiling light fixture.", " a small building with a yellow roof, featuring a box, a chair, and a plane flying overhead."], "sample_ids": ["269939560e08432f9e134309b9a1c587", "a2354f13774340d392fbf33564934aab"], "properties": ["floor plan, heating system, ceiling light fixture", "building, roof, yellow"], "captions_pred_pc": ["a black and white drawing of a house", "a black and white image of a cell phone"], "captions_pred_image": ["a 3d model of a building with a glass facade", "a 3d rendering of a machine with a conveyor belt"], "question": "which building has a roof", "label": 1}, {"captions": [" of white spheres resembling a molecule.", " a small island featuring a large building, trees, and a house on a hill, surrounded by a forest."], "sample_ids": ["9d2c94d03ca745948b8cb4e8cafddb1c", "d557c62e9be741a6b0f6b204d11a9c6f"], "properties": ["color, shape, number", "house, hill, forest"], "captions_pred_pc": ["of a black and white 3d model of a molecule on a white background a black and white 3d model of a molecule on a white background royalty free illustration", "above a black and white illustration of a small island in the middle of a body of water"], "captions_pred_image": ["a 3d sculpture of a white ball on a gray background", "a black and white image of a small island in the middle of a body of water"], "question": "which entity has a house on a hill?", "label": 1}, {"captions": [" of a white building with a small house and a desk with a laptop.", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["9244a2d3a9e94c8398ef991f1661bb58", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["a, desk, laptop", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["a black and white image of a piece of furniture", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a 3d model of an office desk on a white background", "a 3d rendering of a plastic box with several compartments"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a white 3d printed mickey mouse dice with various numbers and symbols on it.", "a black and white of a knife/sword with a handle."], "sample_ids": ["e2645ac544844f3c981203134a99c30c", "c7692fa635e049bda0a2039fa5a784a4"], "properties": ["- material is plastic- shape is dice- color is white", "image, color, black and white"], "captions_pred_pc": ["a circle of dots with the number 2 in the center", "of a black and white knife on a white background"], "captions_pred_image": ["a 3d printed white dice with a mickey mouse face", "a black and white image of a knife"], "question": "which entity is not a black and white image?", "label": 1}, {"captions": [" a human foot with a red, white, and pink bone structure, including a skull with red and white details.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["39cdff793d3f4dcd898dd6b5222cb289", "6b745457e06840119058883b35f78f58"], "properties": ["color, shape, size", "roof, color, blue"], "captions_pred_pc": ["a black and white image of an ornate design on a white background", "a black and white image of a building with dots"], "captions_pred_image": ["a 3d model of a human bone structure", "a 3d model of a house with a steeple on top"], "question": "which entity has a blue roof", "label": 1}, {"captions": [" a white dragon with red horns and eyes.", " a small wooden house with a green roof."], "sample_ids": ["b7efa835186c4084b4d6ca2479af78fb", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["color, horns, eyes", "roof, color, green"], "captions_pred_pc": ["above a black and white image of a sculpture made up of dots", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d model of a goat's head with horns", "a 3d model of a house with a ladder"], "question": "which entity has a green roof", "label": 1}, {"captions": [" a house with a blue roof.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["8ff693cd3ca74f8a901ca259b8b3a7ac", "c3a82df41875402285608ef13a55df57"], "properties": ["roof, color, blue", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white drawing of a cross on a white background royalty free illustration", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a house with a roof", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a small white building with a floor plan and ceiling light.", " of a torn piece of paper, a hole in a wall, and a guitar with a sign on it, accompanied by a piece of metal, wood, and a large rock."], "sample_ids": ["1df55bb7035941cc9829aa904e2af065", "cc4ccf85d4c1425cb5975b8b5664d38a"], "properties": ["floor plan, ceiling light, color", "paper, hole, sign"], "captions_pred_pc": ["a line of dots on a white background a line of dots on a white background royalty free illustration", "a silhouette of a map of the state of karnataka, india on a white background royalty free illustration"], "captions_pred_image": ["a white 3d model of a house", "an image of a torn piece of paper in the shape of a map"], "question": "which entity has a sign on it", "label": 1}, {"captions": [" of a house with a roof.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["195ce38d57164eb588d19f8bd337f36e", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["roof, house, roof", "house, pool, balcony"], "captions_pred_pc": ["a black and white drawing of a toilet on a white background", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d model of a small house royalty free 3d model preview no 2", "a 3d model of a modern house"], "question": "which house has a pool", "label": 1}, {"captions": [" a small, snow-covered house.", " a small house on a hill in a field."], "sample_ids": ["0d00d10b90134dbe9ce7b2b3d6669237", "bd873071252047d38160c4a5fdd2c1b7"], "properties": ["house, snow, cover", "house, hill, field"], "captions_pred_pc": ["in 15 words or less a black and white image of a piece of paper on a white background royalty free illustration", "a black and white photograph of a piece of paper"], "captions_pred_image": ["a piece of broken glass on a white background", "a black and white image of a small house"], "question": "which house is on a hill?", "label": 1}, {"captions": [" a city featuring various buildings, including one with a white roof and numerous white cubes, as well as a plane.", " a house featuring a roof with truss system, framing, insulation, and a ceiling light."], "sample_ids": ["a3c50635c2a04e548e57d4f027899131", "39876e69e3914d99a07e0dc59611c5c0"], "properties": ["building, roof, white, cubes", "roof, truss system, framing"], "captions_pred_pc": ["above a black and white image of a map", "a black and white drawing of a window with dots all over it"], "captions_pred_image": ["a 3d model of a city on a white background", "a 3d model of the roof of a house"], "question": "which roof is more complex", "label": 1}, {"captions": ["a featuring a kite, paper airplane, and birds flying alongside a red, white, and blue plane in the sky.", " a white and gold mirror on a wooden easel stand."], "sample_ids": ["f7bb7dcf3c774149809444d6c7d20ab8", "0d10d734448d4a5d8d07b938c12d9d80"], "properties": ["color, plane, sky", "color, white, gold"], "captions_pred_pc": ["a silhouette of a man and a child standing on the edge of a cliff silhouette of a man and a child standing on the edge of a cliff silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of a man and a child standing on the edge of a silhouette of", "for a black and white image of a shoe on a white background"], "captions_pred_image": ["a 3d sculpture of a bird in flight on a gray background", "a 3d model of a standing mirror on a white background"], "question": "which object is white and gold?", "label": 1}, {"captions": [" an ice cream machine, popsicle with two sticks, and a shelf with a computer monitor.", " of a small white building with stairs and a lid."], "sample_ids": ["cb840159fea7436d81eb33bdccad3596", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["A, a, a", "building, stairs, lid"], "captions_pred_pc": ["a black and white illustration of a bench", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a white and gray wall mounted shelf", "a 3d model of a white box on a gray background"], "question": "which entity has a lid?", "label": 1}, {"captions": [" a wooden house with a roof and framing.", " a large house with a roof on a platform."], "sample_ids": ["4634a9bdf54549a99f68be77f1464b0a", "cb3e09a301b746918a682a595037c7f7"], "properties": ["roof, framing, material", "roof, platform, house"], "captions_pred_pc": ["a black and white drawing of an abstract pattern", "a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of a barn structure", "a 3d model of a small house"], "question": "which house has a roof on a platform", "label": 1}, {"captions": [" a small blue bottle with a lid and a small blue box.", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["c1bc7f87f15943ba88f0410ef9e387e8", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["blue, bottle, box", "lizard, rock, stuffed animal"], "captions_pred_pc": ["a black and white image of a sponge and a sponge holder", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a vase and a box royalty free 3d model preview no. 3", "a 3d model of a vehicle with wheels and tires"], "question": "which entity has a stuffed animal?", "label": 1}, {"captions": [" of a wooden staircase with marble floor and wooden railings in a house.", " a small house on an island with a boat and a bird on a rock."], "sample_ids": ["ee70964ce5e841bd87381cff40d59b88", "1e56a92a0ddc41e59694bd1ad1656149"], "properties": ["floor, staircase, railings", "house, rock, bird"], "captions_pred_pc": ["a black and white drawing of a light switch", "a black and white drawing of a boat in the middle of a body of water"], "captions_pred_image": ["a 3d model of a staircase on a marble surface", "a 3d rendering of a house on a rock"], "question": "which house has a bird on a rock?", "label": 1}, {"captions": [" a spacious office featuring numerous desks, chairs, computers, and a ceiling with blue and white triangles and a ceiling light.", " a spacious office featuring numerous desks, chairs, computers, and a ceiling with blue and white triangles and a ceiling light."], "sample_ids": ["0d7e4d9471414a21b4a5b18a54f7ec22", "0d7e4d9471414a21b4a5b18a54f7ec22"], "properties": ["ceiling, light, desks", "ceiling, light, desks"], "captions_pred_pc": ["a black and white drawing of a square on a white background", "a black and white drawing of a square on a white background"], "captions_pred_image": ["a 3d model of an office space with desks and chairs", "a 3d model of an office space with desks and chairs"], "question": "which entity has a ceiling with blue and white triangles and a ceiling light?", "label": 0}, {"captions": [" a small white house with stairs and a spiral staircase, featuring a white table and ceiling light.", "a featuring a building, a square, a cloud with a square in the middle, a group of people in a room and a field, and clouds floating in the sky."], "sample_ids": ["e9e1cc7fae22458197a61f43a9c355f4", "8557a15b9f244d2cbf16786dbc8b7b25"], "properties": ["house, staircase, table", "building, room, sky"], "captions_pred_pc": ["above a black and white photograph of a dog in a frame", "above a black and white image of a person's hand holding a paintbrush"], "captions_pred_image": ["a 3d model of a small house with a spiral staircase", "a 3d rendering of white clouds floating in the air"], "question": "which entity has a room?", "label": 1}, {"captions": ["a 3d printed green robot with two arms and two legs.", " a cartoon panda bear wearing a diaper."], "sample_ids": ["04e3e8ce541e487b9e342570fe1b4eb2", "e2c307d9fa2b4d40b4602537d7f71e24"], "properties": ["color is green, material is 3d printed, number of limbs is 2", "cartoon, bear, diaper"], "captions_pred_pc": ["a black and white image of a robot in the shape of a snowflake", "a 3d model of a teddy bear on a white background 3d model of a teddy bear on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a futuristic robot standing on its hind legs", "a 3d model of a cute panda bear"], "question": "which entity is a cartoon?", "label": 1}, {"captions": [" a small white building with stairs and shelves.", " of a small white building or house with a white ceiling and kitchen hood."], "sample_ids": ["9e1f64d4fd514059be934077717536dc", "17b23d23309d4385938ced3ca536a1d1"], "properties": ["building, stairs, shelves", "building, ceiling, kitchen"], "captions_pred_pc": ["a black and white image of a person standing in front of a white background", "above a black and white drawing of a bathroom with a toilet and sink"], "captions_pred_image": ["a white 3d model of a building with stairs", "a 3d model of a white building on a gray background"], "question": "which building has a kitchen?", "label": 1}, {"captions": [" of a small house featuring a flat, brown roof, table with two chairs and a stool, ceiling light, and a window.", "s of a laptop, printer, building, and small bathroom, along with a blue-lit box, white and blue truck, and hp color inkjet cartridge."], "sample_ids": ["3a509431d96b43f8a7aebe2846f08b96", "747110c073314ee39ef2f4a8d63222da"], "properties": ["roof, brown, flat", "s, laptop, printer"], "captions_pred_pc": ["a black and white drawing of a snowflake on a white background a black and white drawing of a snowflake on a white background royalty free illustration", "a black and white image of a pair of scissors"], "captions_pred_image": ["a 3d rendering of a table and stool", "a black and white image of an electronic device"], "question": "which entity has a brown roof", "label": 0}, {"captions": [" a diverse town featuring houses, buildings, people, animals, and desert elements.", " a wooden staircase with a railing and table."], "sample_ids": ["436d6492fa06466680ecc82e5e07a7a0", "956247bea850458199c651037d4b1d7f"], "properties": ["house, building, people", "railing, table, staircase"], "captions_pred_pc": ["a black and white drawing of a person sitting on a bench", "above a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a small town in the middle of a field", "a 3d model of a table with a staircase"], "question": "which entity has a table?", "label": 1}, {"captions": ["a gray background featuring a white line in the middle.", " a house with a green, wooden-structured roof."], "sample_ids": ["47f89f92bef14b7193d0ffa3934f6977", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["color, line, gray", "roof, color, green"], "captions_pred_pc": ["above a black and white image of a piece of furniture", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["an airplane flying in the sky with the sun shining behind it", "a 3d model of a house with a triangular roof"], "question": "which entity has a roof that is green?", "label": 1}, {"captions": [" a small house with a roof and ceiling-mounted air conditioner.", "a white of a man with arms outstretched."], "sample_ids": ["6965067ea9e34357a7af21a2f078fbac", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["roof, air conditioner, house", "image, color, white"], "captions_pred_pc": ["a black and white illustration of a window", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a 3d rendering of a small house with a covered porch", "a 3d model of a man with his arms outstretched"], "question": "which image is white", "label": 1}, {"captions": ["a featuring a bench with flowers, a bridge, a white sculpture, a bicycle with a flower, a giraffe, and a palm tree.", " a small, rocky island with diverse terrain and scattered rocks."], "sample_ids": ["12093c89a60941e7884b252bdc05104c", "09f2cf267e954c958828325067bcc36a"], "properties": ["giraffe, bench, flower", "island, terrain, rocks"], "captions_pred_pc": ["a black and white drawing of a gear on a white background", "above a black and white photo of a small island in the middle of a body of water"], "captions_pred_image": ["a 3d model of a sculpture made of sticks", "a black and white image of a piece of dirt on the ground"], "question": "which entity has more rocks", "label": 1}, {"captions": ["a featuring a skeleton, torn paper, long stick, rock, and broken wood.", " a small wooden house with a green roof."], "sample_ids": ["46903bf029934b1989bc062dcb0a5531", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["skeleton, torn, paper, long stick, rock, broken wood", "roof, color, green"], "captions_pred_pc": ["a close up of a black object on a white background", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d sculpture of a person's hand in the air royalty-free 3d model preview", "a 3d model of a house with a ladder"], "question": "which object is made of wood", "label": 1}, {"captions": ["a 3d cartoon character wearing a hat and holding various items, such as a sword, gun, broomstick, and stick.", " a rusty green metal box with a handle and a gun inside."], "sample_ids": ["9863dee114264b89a95dca4c78d08424", "76cfd0e88ce243d483919a018a4f1a9e"], "properties": ["hat, sword, gun", "box, handle, gun"], "captions_pred_pc": ["for a black and white illustration of a small monster holding a spoon in one hand and a cup of coffee in the other", "a black and white square with dots on a white background"], "captions_pred_image": ["a 3d model of a police officer holding a baton and wearing a hat and helmet", "a 3d rendering of a metal box with a handle"], "question": "which entity has a handle?", "label": 1}, {"captions": [" a small green chair with a slanted back and white base.", " of a plague mask with a rusty, horned, wooden helmet and a crow's head design."], "sample_ids": ["7f93c12cbbc74e579d5f0430cfa0010f", "2b0896f810074399a5ae7d6dbab8c330"], "properties": ["color, white, base, slanted", "- material is wood, rusty, horned"], "captions_pred_pc": ["above a black and white drawing of a chair", "in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a chair", "3d model of a plague doctor's mask"], "question": "which object is made of wood", "label": 1}, {"captions": ["a featuring a room with purple and black walls, a table and chairs, a pink and black door, a purple and black cabinet, and a pink and black shelf.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["21639082215b4b179d574d2408124838", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["color, room, furniture", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["for a black and white image of an abstract design on a white background", "a black and white image of a cone shaped object"], "captions_pred_image": ["a black and white 3d rendering of a room with a table and chairs", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a snowman with a white umbrella and a white coat.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["bbfff7ef6ab14b30bc7b5a3aa8391f95", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["color, umbrella, coat", "island, mountain, grass"], "captions_pred_pc": ["a black and white image of a tree made of dots", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d model of a snowflake on a white background", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more grass", "label": 1}, {"captions": [" a blue character with arms outstretched.", "a pixelated-textured purple sphere ."], "sample_ids": ["e365358ce93a4ba2b1b9a48537b85477", "fb68393941804e769d5c9b372864a642"], "properties": ["color, shape, size", "texture, color, shape"], "captions_pred_pc": ["in 15 words or less a black and white image of a hexagonal ring", "a black and white square made of dots on a white background"], "captions_pred_image": ["a 3d model of a person standing with their back to the camera", "a 3d model of a gray sphere on a white background"], "question": "which entity is a sphere?", "label": 1}, {"captions": ["a featuring a phone booth, desk, chair, lamp, ladder, and two additional chairs.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["7da804ad2b554c9a9915d775afb015d3", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["desk, chair, lamp", "a, material, clay"], "captions_pred_pc": ["a black and white illustration of a city skyline", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d rendering of a desk and chair in a room", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": [" a house with a yard, trees, bushes, and surrounding buildings.", "a pixelated-textured purple sphere ."], "sample_ids": ["7f8942ef51dd4246993a587a12df168c", "fb68393941804e769d5c9b372864a642"], "properties": ["house, yard, surrounding buildings", "texture, color, shape"], "captions_pred_pc": ["a black and white image of a truck on a white background", "a black and white square made of dots on a white background"], "captions_pred_image": ["a 3d model of a house in the middle of a field", "a 3d model of a gray sphere on a white background"], "question": "which entity is a sphere?", "label": 1}, {"captions": [" a small white and brown house with windows and a door.", "\"multiple white cubes arranged in a row on a gray background.\""], "sample_ids": ["7de99253a4bc4d98bd941e40bbad8c7a", "17c8222d4ce04e518117078e7de6aaed"], "properties": ["color, door, window", "color, background, white"], "captions_pred_pc": ["of a person wearing a black and white striped shirt", "a black and white image of a box with the words 'box 2' and 'thoughtful'"], "captions_pred_image": ["a 3d model of a police box royalty free 3d model preview no.2", "an image of a white background with a few small cubes on it"], "question": "which object has a white door and windows?", "label": 0}, {"captions": [" of a white rock-like object, possibly a shell or ice.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["096e42b466ec438d95c5d89a85191534", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["white, rock, shell", "torso, breasts, pattern"], "captions_pred_pc": ["in one hundred words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words or less 100 words", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a 3d model of a white rock on a gray background", "a 3d model of a woman's chest"], "question": "which entity is a torso?", "label": 1}, {"captions": [" a large, rusty, square-shaped metal pillar resembling a rusted tower.", "a 3d wooden toy castle with red and yellow towers, a red roof, and a man inside."], "sample_ids": ["b5afccae993346079483507296fbb029", "311f6655ed854899b07ea10f3613ef7a"], "properties": ["shape is square, material is metal, color is rusty", "a, color, red"], "captions_pred_pc": ["above a black and white image of a square frame with dots", "a black and white drawing of a wallet on a white background"], "captions_pred_image": ["a 3d model of a concrete column", "a 3d model of a castle with two towers"], "question": "which object is made of wood", "label": 1}, {"captions": ["a 3d object featuring a fish with a long nose and open mouth, a cat head with red, yellow, and green stripes, a frog with a long snout, a white horse with green and yellow stripes, and a wolf mask.", " featuring a moss-covered rock, mushroom in grass, and green leaves with a brown spot."], "sample_ids": ["276699bb0f974c47b4e2954cfcd1651c", "34ebe81ae93841ca829efd15aee4d8c1"], "properties": ["a, color, white", "moss, mushroom, grass"], "captions_pred_pc": ["a black and white image of a skull in the shape of a butterfly", "for a black and white illustration of a cloud on a white background"], "captions_pred_image": ["a 3d model of an animal with a long nose", "a 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor"], "question": "which entity has a mushroom in grass?", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", "a white of a man with arms outstretched."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["color, shape, and size", "image, color, white"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d model of a man with his arms outstretched"], "question": "which entity is a white image?", "label": 1}, {"captions": [" a pink-framed building structure with beams and trusses.", "a featuring a small boat, a rock with a hole, and blue water."], "sample_ids": ["18e392c5360146eda498c5edab25b15c", "7ccdffc0d6404e8d9144260255ea0c5c"], "properties": ["frame, beams, trusses", "water, boat, rock"], "captions_pred_pc": ["a black and white drawing of a metal grate", "a black and white illustration of a surfboard"], "captions_pred_image": ["a 3d model of a building under construction", "a 3d image of an animal laying on the ground"], "question": "which entity has a boat?", "label": 1}, {"captions": [" a snowy small village with farm buildings and a fence.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["6bb669534ccc434f9ab4d7b39bae3510", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["building, fence, snowy", "door, lock, handle"], "captions_pred_pc": ["a black and white drawing of a boat on the water", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a 3d model of a small village in the snow royalty free 3d model preview no. 3", "a black and white image of a door with a crack in it"], "question": "which door has a lock", "label": 1}, {"captions": [" a colorful building with red and blue blocks, a yellow roof, and hanging from the ceiling.", " of a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["2a31c42de6f74ddba6b19b3467066e11", "8cd3f5ff0fc041eca9a480faa6739480"], "properties": ["color, roof, block", "roof, trusses, beams"], "captions_pred_pc": ["a black and white drawing of a room with a lot of dots", "in 15 words or less a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a table with multiple tables stacked on top of each other", "a 3d model of a roof structure"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": [" a castle on an island with a small floating house, trees, and clouds.", " a small island featuring a large building, trees, and a house on a hill, surrounded by a forest."], "sample_ids": ["c4c09479570943e2845fbd4c6a450568", "d557c62e9be741a6b0f6b204d11a9c6f"], "properties": ["castle, island, house", "house, hill, forest"], "captions_pred_pc": ["above a black and white illustration of a group of dots in the shape of a circle", "above a black and white illustration of a small island in the middle of a body of water"], "captions_pred_image": ["a 3d model of a small house on an island", "a black and white image of a small island in the middle of a body of water"], "question": "which entity has a house on a hill?", "label": 1}, {"captions": [" a red \"welcome to northwich\" billboard on a wooden base.", " of a house with a roof."], "sample_ids": ["225e4094141d416faba7c5598dc55ff8", "195ce38d57164eb588d19f8bd337f36e"], "properties": ["base material is wood, color is red, message is welcome to northwich", "roof, house, roof"], "captions_pred_pc": ["a black and white illustration of a circular object with many small dots on it", "a black and white drawing of a toilet on a white background"], "captions_pred_image": ["a 3d model of a chalkboard on a pedestal royalty free 3d model preview no.2", "a 3d model of a small house royalty free 3d model preview no 2"], "question": "which object has a roof", "label": 1}, {"captions": [" a tall glass tower featuring blue, green, and white squares.", "a featuring a train, large ship, long metal pipe, boat, black map of kenya, and a black piece of plastic."], "sample_ids": ["405d68eda26c401fbcddc7e3a457c74e", "49c5a9d42ba64fb2b681ef583d700b98"], "properties": ["color, shape, height", "a train, a ship, a boat"], "captions_pred_pc": ["of a black and white photo of a pair of earrings", "above a black and white image of a long, curved line on a white background"], "captions_pred_image": ["a black and white photograph of a metal sculpture on a pedestal", "a 3d model of a submarine"], "question": "which entity has a train", "label": 1}, {"captions": [" a large, black and white circular building, resembling a stadium or ring structure.", " a small wooden house with a green roof."], "sample_ids": ["67f46bb0048244c687a58d1017a08f6b", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["building, color, black and white", "roof, color, green"], "captions_pred_pc": ["the letter c in black and white illustration of the letter c in black and white illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustr", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d model of a circular fence with black and white stripes", "a 3d model of a house with a ladder"], "question": "which building is made of wood", "label": 1}, {"captions": [" a small gnome chandelier with three candles, featuring red and blue lights and adorned with pink and blue ribbons.", " a broken human skull and stone bowl."], "sample_ids": ["6398f4e75a2d480da58396827ac64249", "f7f1d8f726cb4ec6bda791aae99a10c2"], "properties": ["- color is red, blue, pink", "skull, bowl, human"], "captions_pred_pc": ["for a black and white image of an object on a white background", "a black and white illustration of a toilet bowl with dots all over it"], "captions_pred_image": ["a white chandelier with three light bulbs hanging from the ceiling", "a ceramic bowl with a large crack in the middle"], "question": "which object is made of stone", "label": 1}, {"captions": [" a large industrial building, including a factory and warehouse.", " of a wooden windmill with a red roof."], "sample_ids": ["e3d0e407049c43f39c3aaa74667f56b1", "2ad8fca30285483d8b7f602fa078215d"], "properties": ["building, type, factory", "roof, color, red"], "captions_pred_pc": ["a black and white image of a city skyline on a white background", "a white and black image of a snowflake in the shape of a snowflake"], "captions_pred_image": ["a 3d model of an aircraft carrier on a white background royalty free 3d model preview no.2", "a 3d model of a windmill on a gray background"], "question": "which building has a red roof", "label": 1}, {"captions": [" a wooden shed with a gray roof.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["8b32e1ded62144768cd9ca8945fa8524", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["roof, color, gray", "roof, color, yellow"], "captions_pred_pc": ["a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a shed with a gray roof", "a 3d model of a house with a roof"], "question": "which roof is yellow", "label": 1}, {"captions": [" a red circular object with a checkered pattern, resembling a round pillow or bed cover.", "a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier."], "sample_ids": ["9cf9fb6d07084488892422a5a5be00ef", "ef8288c9fdfc4e0f9c1fe25d570a104e"], "properties": ["pattern, color, shape", "color is white, yellow, plastic"], "captions_pred_pc": ["a black circle on a white background", "a black and white image of a metal bowl with dots"], "captions_pred_image": ["a 3d model of a round cushion royalty-free 3d model preview", "a white plastic container with a label on it"], "question": "which object is made of plastic", "label": 1}, {"captions": [" of an ancient stone bowl, wooden headpiece, and broken pottery pieces.", " of an ancient stone bowl, wooden headpiece, and broken pottery pieces."], "sample_ids": ["d48b6ff03d6744eb921c41a4a05ff55d", "d48b6ff03d6744eb921c41a4a05ff55d"], "properties": ["bowl, pottery, headpiece", "bowl, pottery, headpiece"], "captions_pred_pc": ["a black and white illustration of a circle made up of many small dots", "a black and white illustration of a circle made up of many small dots"], "captions_pred_image": ["a 3d model of a piece of ancient pottery", "a 3d model of a piece of ancient pottery"], "question": "which entity has a bowl", "label": 0}, {"captions": [" a house featuring a pitched roof structure with brick detailing.", " a small building with a yellow roof, featuring a box, a chair, and a plane flying overhead."], "sample_ids": ["5fbd274f897b44fcafa02ee84228debf", "a2354f13774340d392fbf33564934aab"], "properties": ["structure, roof, pitch", "building, roof, yellow"], "captions_pred_pc": ["a black and white illustration of a square with a lot of dots on it", "a black and white image of a cell phone"], "captions_pred_image": ["a 3d model of the roof of a house", "a 3d rendering of a machine with a conveyor belt"], "question": "which building has a yellow roof", "label": 1}, {"captions": ["a featuring a lamp, harp, white bowl, and white curved wall.", " tall grass, plants, rocks, and a tree."], "sample_ids": ["55bcec23e1b34f0d9d748b4dcc3ea123", "eefed882ed5f4711bc5a76332d9712f3"], "properties": ["lamp, harp, bowl", "grass, plants, rocks"], "captions_pred_pc": ["a black and white illustration of a curved line", "a black and white drawing of a tree"], "captions_pred_image": ["a 3d model of a harp in a white room", "a 3d model of a group of trees"], "question": "which entity has more grass", "label": 1}, {"captions": [" a destroyed house and plane amidst a town with buildings.", "a low poly of a plant on a white object, resembling a paper or plastic bag."], "sample_ids": ["0fd3ddca09194b8f94ef731af3b64a08", "d49d8ed244094349a99e4faca05e0690"], "properties": ["house, plane, town", "low poly, plant, white"], "captions_pred_pc": ["above a black and white drawing of a piece of paper", "a black and white illustration of a dandelion on a white background a black and white illustration of a dandelion on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a damaged building", "a 3d model of a plant growing out of a rock"], "question": "which object is white", "label": 1}, {"captions": [" of a white spiral, earbuds, ceiling light, and silver ring with black and white scissors and design.", "star wars stormtrooper "], "sample_ids": ["c69f60b389124ad9b4f81c64ec332054", "05678f11f4fe47178c9b4941ee334e16"], "properties": ["earbuds, light, ring", "a, color, white"], "captions_pred_pc": ["a black and white drawing of a needle and thread", "a black and white illustration of a stormtrooper"], "captions_pred_image": ["a black and white illustration of a pair of sunglasses and a pair of scissors next to each other on a white background", "a 3d model of a star wars stormtrooper"], "question": "which entity is not a white color?", "label": 0}, {"captions": [" a white rocket ship.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["22137b9fff744310ad3b4abe6d869718", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, shape, size", "a, material, clay"], "captions_pred_pc": ["above a black and white illustration of a planet", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a rocket ship royalty free 3d model preview no.1", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": [" of a pile of metal, torn and shredded paper, rocks, and a decayed animal.", " a concrete wall with peeling paint and rusted metal features."], "sample_ids": ["c117f1923cad4ecf9df61b6e3d633374", "4376831ff557462dbacc4cce88a8cc86"], "properties": ["a pile of metal, torn and shredded paper, rocks, and a decayed animal", "paint, rust, concrete"], "captions_pred_pc": ["a black and white map of germany on a white background", "above a black and white image of a shelf on a white background"], "captions_pred_image": ["a 3d model of a crumpled piece of paper on a white background", "a 3d model of a concrete wall"], "question": "which entity has more rust", "label": 1}, {"captions": ["a featuring a small room with a bunk bed, desk, chair, table, and a blue house.", " a group of ponies in a row."], "sample_ids": ["dd3a9323ed514ccab330973ff9588015", "e2c00fdbc5bd40bba2c41b62520a58e9"], "properties": ["room, bed, desk", "group, row, pony"], "captions_pred_pc": ["a black and white drawing of a door", "a black and white drawing of an octopus on a white background"], "captions_pred_image": ["a 3d model of a small room with a bunk bed", "a 3d model of my little pony"], "question": "which entity is a group of ponies?", "label": 1}, {"captions": ["three white paper windmills and a city model with a nativity scene silhouette.", " a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light."], "sample_ids": ["fa5ee6165f31465d9d75d046818f4006", "5ea0962b100b4fccb761ed84afe027b5"], "properties": ["windmills, silhouette, city model", "house, table, chair"], "captions_pred_pc": ["a black and white photo of a pair of sunglasses", "above a black and white photograph of an open door"], "captions_pred_image": ["a 3d model of a cityscape on a white background", "a 3d rendering of a small white table with a chair"], "question": "which entity has a table and chair?", "label": 1}, {"captions": ["a featuring a small island with trees, mountains, a house on a hill, a large building, and a boat floating in the water.", " a small, modern house with a green roof, located on a hill, surrounded by trees, grass, and a pond."], "sample_ids": ["37bdbc633c9545878a98ff47c3029e32", "a452d5381dad4dc09f5ebe10635ae5fe"], "properties": ["a, island, water", "house, roof, green"], "captions_pred_pc": ["a black and white photo of a boat in the water", "above an illustration of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white"], "captions_pred_image": ["a 3d model of a building with trees surrounding it", "a 3d model of a building with a black roof"], "question": "which house has a green roof", "label": 1}, {"captions": [" a house with a pink roof, truss, and a square white ceiling lamp.", " a small house on an island with a boat and a bird on a rock."], "sample_ids": ["91b2e9e4660946f5b4808a18b5323b69", "1e56a92a0ddc41e59694bd1ad1656149"], "properties": ["roof, truss, lamp", "house, rock, bird"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white drawing of a boat in the middle of a body of water"], "captions_pred_image": ["a 3d model of a house with a metal roof", "a 3d rendering of a house on a rock"], "question": "which house has a bird on a rock?", "label": 1}, {"captions": [" a stone wall featuring a statue and a window.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["09ab58e01e0d4393bc1e82f157e641be", "c3a82df41875402285608ef13a55df57"], "properties": ["a, window, statue", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white image of dots on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a black and white photo of a sculpture of jesus on the cross", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " a large metal building with a roof and truss structure."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "b85a99699ccd4bcba213322113bb253d"], "properties": ["color, white, black, white", "roof, truss, structure"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "of a metal grate on a white background"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": [" a white rock formation with pebbles and ice elements.", "a featuring a building, a square, a cloud with a square in the middle, a group of people in a room and a field, and clouds floating in the sky."], "sample_ids": ["94d94f5a75de4bd0a43b08609630876e", "8557a15b9f244d2cbf16786dbc8b7b25"], "properties": ["color, shape, texture", "building, room, sky"], "captions_pred_pc": ["a black and white drawing of a pair of shoes on a white background royalty free illustration", "above a black and white image of a person's hand holding a paintbrush"], "captions_pred_image": ["a 3d model of a small rocky island in the middle of a body of water", "a 3d rendering of white clouds floating in the air"], "question": "which entity has a building?", "label": 1}, {"captions": [" a modern office building with a green door, green roof, windows, and blue lights.", " a table with pillar-like yellow and white poles, featuring a wooden structure, large tent, glass roof, and a chandelier with numerous glass tubes."], "sample_ids": ["d6087023095446fbadef1721478373b2", "fa06167d83e54b05bdfbeeae2ca7c8a6"], "properties": ["door, roof, window", "table, structure, roof"], "captions_pred_pc": ["a black and white drawing of a toilet brush", "a black and white image of a map with dots"], "captions_pred_image": ["a 3d model of an apartment building", "a 3d model of a building with many pillars"], "question": "which entity has a roof made of glass?", "label": 1}, {"captions": ["a white 3d printed ring with a bow, snake design, and two arms.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["1fa054d12a084b7786d1185e0dc8787c", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, material, shape", "a, material, clay"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a virus on a white background stock illustration", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d printed ring with a unique design", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": [" a white circular light with a yellow dot in the middle.", "\"multiple white cubes arranged in a row on a gray background.\""], "sample_ids": ["fa753fe490854b35be3b76450d2641e0", "17c8222d4ce04e518117078e7de6aaed"], "properties": ["color, shape, size", "color, background, white"], "captions_pred_pc": ["above an illustration of a black circle with a white background", "a black and white image of a box with the words 'box 2' and 'thoughtful'"], "captions_pred_image": ["a white circular object on a gray background", "an image of a white background with a few small cubes on it"], "question": "which object is white", "label": 0}, {"captions": [" a row of houses featuring roof structures with green roofs and tiled roof slats.", " a house with a roof and beams."], "sample_ids": ["aef9b23a78a7450286a961cc13448d00", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["roof, green, tiled", "roof, beams, house"], "captions_pred_pc": ["of a black and white photo of a decorative metal wall hanging", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a set of stainless steel shelves", "a 3d model of a building with a roof"], "question": "which house has a roof with beams", "label": 1}, {"captions": [" a house with a green roof and lawn.", " a small, rocky island with diverse terrain and scattered rocks."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "09f2cf267e954c958828325067bcc36a"], "properties": ["roof, green, lawn", "island, terrain, rocks"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "above a black and white photo of a small island in the middle of a body of water"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a black and white image of a piece of dirt on the ground"], "question": "which entity has a diverse terrain", "label": 1}, {"captions": [" a small house with a blue roof, a door, and a pool.", "a white of a woman with her arms outstretched."], "sample_ids": ["40c52c2d278345c5b4e8d00a991271dc", "2cbfaa4fb2d84a6f94e67b4fd6e2f26f"], "properties": ["door, roof, pool", "image, color, white"], "captions_pred_pc": ["of a black and white photo of a window with fringes", "a black and white silhouette of a person on a skateboard"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a woman with her arms outstretched"], "question": "which image is white", "label": 1}, {"captions": ["a featuring a rock with a hole, a piece of metal, a knife, an arrow, and a person near a sand pit.", "a low poly of a plant on a white object, resembling a paper or plastic bag."], "sample_ids": ["b57936676e9d43abb635fa1217992287", "d49d8ed244094349a99e4faca05e0690"], "properties": ["a, hole, rock", "low poly, plant, white"], "captions_pred_pc": ["a black and white image of a lace belt", "a black and white illustration of a dandelion on a white background a black and white illustration of a dandelion on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon's surface 3d model of the moon'", "a 3d model of a plant growing out of a rock"], "question": "which object is white", "label": 1}, {"captions": ["a 3d cartoon character wearing a hat and holding various items, such as a sword, gun, broomstick, and stick.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["9863dee114264b89a95dca4c78d08424", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["hat, sword, gun", "torso, breasts, pattern"], "captions_pred_pc": ["for a black and white illustration of a small monster holding a spoon in one hand and a cup of coffee in the other", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a 3d model of a police officer holding a baton and wearing a hat and helmet", "a 3d model of a woman's chest"], "question": "which entity has a teddy bear?", "label": 1}, {"captions": [" of a loaf of bread and a piece of chocolate cake.", " a stone throne with stairs and a tree, accompanied by a concrete wall featuring a statue and a fireplace with a shelf above it."], "sample_ids": ["3a6cda16adee41ebbe3cbb8c6cdbf464", "93fb4197f0014f7582029af24c7ed9de"], "properties": ["bread, chocolate, cake", "throne, stairs, tree"], "captions_pred_pc": ["a black and white image of a sponge on a white background sponge on a white background royalty free illustration", "in 15 words or less a black and white image of a toilet paper roll on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of a loaf of bread", "a 3d model of a throne with a tree on it"], "question": "which entity has more stairs", "label": 1}, {"captions": ["s of a plane, house, pile of rocks, and islands, accompanied by watercolor paintings of a tree, rocks, and a boat in a sand dune.", " a group of ponies in a row."], "sample_ids": ["8da8da6ccf5f4011a4115977c55d1cb8", "e2c00fdbc5bd40bba2c41b62520a58e9"], "properties": ["s, plane, house", "group, row, pony"], "captions_pred_pc": ["a black and white silhouette of a map", "a black and white drawing of an octopus on a white background"], "captions_pred_image": ["a black and white image of a small island in the middle of a body of water", "a 3d model of my little pony"], "question": "which entity has a group of ponies in a row?", "label": 1}, {"captions": [" a white plastic ring, resembling a car fender or mouth guard.", "a wooden tower made of stacked blocks with holes in them, resembling a toy castle."], "sample_ids": ["f76460ad2a1a4fffb370d4556c405c6e", "da8b5d21da9b4037982f29383d60b100"], "properties": ["size, material, color", "resembles, toy, castle"], "captions_pred_pc": ["of a black plastic ring on a white background", "a black and white drawing of a pair of scissors"], "captions_pred_image": ["a 3d rendering of a white plastic object on a gray background", "a 3d model of a tower made out of blocks"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a large house/building structure with a roof.", " a house featuring a pitched roof structure with brick detailing."], "sample_ids": ["82859e4c6d4e4bbea94b6252bef1d398", "5fbd274f897b44fcafa02ee84228debf"], "properties": ["roof, structure, house", "structure, roof, pitch"], "captions_pred_pc": ["a black and white photograph of a metal sculpture", "a black and white illustration of a square with a lot of dots on it"], "captions_pred_image": ["a 3d model of a large white structure", "a 3d model of the roof of a house"], "question": "which structure has a pitched roof", "label": 1}, {"captions": ["a featuring a small island with a mountain, a lake in the middle, and a bird flying above.", " a multicolored cube representing a protein, featuring pink, yellow, red, and green hues."], "sample_ids": ["e51fead89ae64968b2ca7f4ccb6d3b97", "ee7c3113f2754f9cbe8980b1b7cc4eff"], "properties": ["a, bird, lake", "color, shape, color"], "captions_pred_pc": ["a black and white photograph of a cloudy sky", "a black and white pattern of small dots on a white background a black and white pattern of small dots on a white background illustration"], "captions_pred_image": ["a 3d model of a mountain range in black and white", "a 3d model of a piece of fabric"], "question": "which entity is a cube?", "label": 1}, {"captions": ["a featuring a ship, large rock, stone slab, ruined building, stone floor, small stone structure, triangular object, and a piece of concrete.", " a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light."], "sample_ids": ["d83c5a2fd61c4e9f927d1d7b7c9e5aae", "5ea0962b100b4fccb761ed84afe027b5"], "properties": ["ruined building, stone floor, small stone structure", "house, table, chair"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a mountain", "above a black and white photograph of an open door"], "captions_pred_image": ["a 3d model of a piece of broken pottery", "a 3d rendering of a small white table with a chair"], "question": "which entity has a table and chair?", "label": 1}, {"captions": [" a blue and white building structure with a table and suspended ceiling.", " of a house with a roof structure, including a greenhouse."], "sample_ids": ["fc63507c452c4a4b89f614f2b8bff76a", "d7483292784b4e2b81df1c50f2a8664a"], "properties": ["color, table, ceiling", "roof, structure, greenhouse"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a room", "a 3d illustration of a window with dots on a white background 3d illustration of a window with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a table with multiple tables stacked on top of each other", "a 3d model of a building with a roof"], "question": "which structure has a greenhouse", "label": 1}, {"captions": [" a white plastic container with a lid, a small box, a cup, a bottle, and a jar.", " a stone, wood, rock, sliced bread, and a skull with a blue hat."], "sample_ids": ["20a02705a66f460492e07345e84a62ed", "0169af65ffc64bbf8e2fe6c6de08d485"], "properties": ["a box, a cup, a bottle, a jar", "hat, skull, bread"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white illustration of a skull in the shape of dots"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic cap, and a plastic container", "a black and white image of a stone sculpture"], "question": "which entity has a skull?", "label": 1}, {"captions": ["a 3d wooden pole with a metal spear handle.", " a large metal building with a roof and truss structure."], "sample_ids": ["1d5cf234576e41f0ba209c5e19d2db47", "b85a99699ccd4bcba213322113bb253d"], "properties": ["- material is wood, metal, metal", "roof, truss, structure"], "captions_pred_pc": ["of a black and white illustration of a vase on a pedestal", "of a metal grate on a white background"], "captions_pred_image": ["a black and white image of an old-fashioned pitchfork", "a 3d model of a long metal fence"], "question": "which entity is made of metal", "label": 1}, {"captions": ["a featuring a black and white sailboat, a black and green bird, a triangular lamp, a helicopter, and a kite.", "a featuring a phone booth, desk, chair, lamp, ladder, and two additional chairs."], "sample_ids": ["a59efdbb28a241b8866bfc094c8c80b2", "7da804ad2b554c9a9915d775afb015d3"], "properties": ["a, bird, lamp", "desk, chair, lamp"], "captions_pred_pc": ["a silhouette of a person standing in the air", "a black and white illustration of a city skyline"], "captions_pred_image": ["a black and white image of a futuristic sculpture", "a 3d rendering of a desk and chair in a room"], "question": "which entity has a desk", "label": 1}, {"captions": [" a red and black striped battery box resembling a radiator, with elements of black and white striped walls.", " of a cherry blossom bonsai tree with pink flowers."], "sample_ids": ["6d4cb53d5953447aaf3c44872cd3ae6f", "037fff0f153c41ea8b9c9392c2e2439a"], "properties": ["color, shape, material", "flower, color, pink"], "captions_pred_pc": ["a black and white drawing of a person sitting on a bench royalty free illustration", "for a black and white illustration of a person on a skateboard"], "captions_pred_image": ["a 3d rendering of a black and white building", "a 3d model of a bonsai tree on a pedestal"], "question": "which entity has a pink flower?", "label": 1}, {"captions": [" a small white and brown house with windows and a door.", " a small white building with stairs and a white table."], "sample_ids": ["7de99253a4bc4d98bd941e40bbad8c7a", "e30374c614f54fdb90f35b96b071349d"], "properties": ["color, door, window", "building, stairs, table"], "captions_pred_pc": ["of a person wearing a black and white striped shirt", "above a black and white drawing of a cat sitting on top of a letter 'e'"], "captions_pred_image": ["a 3d model of a police box royalty free 3d model preview no.2", "a 3d model of a building with a staircase"], "question": "which building has a table?", "label": 1}, {"captions": ["a featuring a white dog, white wolf, white and pink cats, and a pink fish, all with pink eyes.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["5392e72784be41e485bf2d43cf0bee6a", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, white, pink, eyes", "a, material, clay"], "captions_pred_pc": ["a black and white image of a bear's head", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a wolf's head on a white background", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": ["a 3d white cube with windows resembling a building.", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["4a07a5293f024bb0a353954a056ef626", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["- material is white- color is white- texture is textured", "torso, breasts, pattern"], "captions_pred_pc": ["a black and white image of a square made up of small dots", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["a 3d model of a cube", "a 3d model of a woman's chest"], "question": "which entity is a torso?", "label": 1}, {"captions": [" a small black and white cube-like object.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["66f9c2c2216e4f81a2aa6c12c08bfb55", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, shape, size", "a, material, clay"], "captions_pred_pc": ["above a black and white image with a square in the center", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d rendering of an object on a white and black striped surface", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": [" featuring a sandbox, sand bucket, wooden blocks, water container, and a lamp made out of blocks.", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["674a36147ffb47059e48abc9fa19d923", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["sandbox, sand bucket, wooden blocks", "lizard, rock, stuffed animal"], "captions_pred_pc": ["for a black and white photo of a basketball hoop", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a brick, a box, and a pile of dirt royalty free 3d model preview no. 3", "a 3d model of a vehicle with wheels and tires"], "question": "which entity has a stuffed animal?", "label": 1}, {"captions": [" a small house with a yellow roof and chimney.", "a white of a man with arms outstretched."], "sample_ids": ["0056e85a243b47a08ddbcd36816cb6ae", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["roof, yellow, chimney", "image, color, white"], "captions_pred_pc": ["a black and white illustration of a house made up of dots on a white background a black and white illustration of a house made up of dots on a white background royalty free illustration", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a 3d model of a small house royalty-free 3d model preview no.2", "a 3d model of a man with his arms outstretched"], "question": "which image is white", "label": 1}, {"captions": [" the white text \"ekberkaslan\" with a row of white cubes and a numbered box.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["87ee30d475f34b799c24bf7ef3a7b540", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["- color is white- shape is cubes- number is 1", "roof, color, yellow"], "captions_pred_pc": ["a close up of a black and white striped scarf", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d image of the word ebercaskalan on a white background", "a 3d model of a house with a roof"], "question": "which entity has a roof that is yellow?", "label": 1}, {"captions": [" a yellow monster-sphere with teeth.", " a large metal building with a roof and truss structure."], "sample_ids": ["fc5f906bab2c4c36a406ebdc15f58541", "b85a99699ccd4bcba213322113bb253d"], "properties": ["color, shape, texture", "roof, truss, structure"], "captions_pred_pc": ["a black and white illustration of a dandelion on a white background dandelion illustration on a white background illustration", "of a metal grate on a white background"], "captions_pred_image": ["a 3d model of a white ball with teeth", "a 3d model of a long metal fence"], "question": "which entity has a roof and truss structure", "label": 1}, {"captions": [" a small green cannon wheel with a handle.", " of a wooden windmill with a red roof."], "sample_ids": ["97d13db38fa24556afa1eef04fc518e6", "2ad8fca30285483d8b7f602fa078215d"], "properties": ["color, green, handle", "roof, color, red"], "captions_pred_pc": ["of a small black object on a white background", "a white and black image of a snowflake in the shape of a snowflake"], "captions_pred_image": ["a 3d model of a cannon with wheels", "a 3d model of a windmill on a gray background"], "question": "which object has a red roof", "label": 1}, {"captions": [" a house featuring a green roof and red frame.", " a small black house with a green roof, resembling a shed or container."], "sample_ids": ["00d9a408067d46afa127a404f63b4f65", "bdb8e4c36ccb477890fd6ae569ae305c"], "properties": ["color, roof, green, frame, red", "black, roof, green"], "captions_pred_pc": ["a black and white illustration of a house made of dots", "a black and white drawing of a square with dots all over it"], "captions_pred_image": ["a 3d model of a building with a metal roof", "a 3d model of a small black building"], "question": "which house has a green roof", "label": 1}, {"captions": [" an ice cream machine, popsicle with two sticks, and a shelf with a computer monitor.", " of a beige and white round soap dish/small bowl."], "sample_ids": ["cb840159fea7436d81eb33bdccad3596", "5414d75e47104589837f3df8b6de6d22"], "properties": ["A, a, a", "beige, white, round"], "captions_pred_pc": ["a black and white illustration of a bench", "of a 3d model of a bracelet 3d model of a bracelet on a white background royalty free illustration 2019"], "captions_pred_image": ["a 3d rendering of a white and gray wall mounted shelf", "a white ceramic bowl sitting on top of a gray surface"], "question": "which object is white?", "label": 1}, {"captions": [" featuring a moss-covered rock, mushroom in grass, and green leaves with a brown spot.", " a small white building with stairs and a white table."], "sample_ids": ["34ebe81ae93841ca829efd15aee4d8c1", "e30374c614f54fdb90f35b96b071349d"], "properties": ["moss, mushroom, grass", "building, stairs, table"], "captions_pred_pc": ["for a black and white illustration of a cloud on a white background", "above a black and white drawing of a cat sitting on top of a letter 'e'"], "captions_pred_image": ["a 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor", "a 3d model of a building with a staircase"], "question": "which entity has a table?", "label": 1}, {"captions": [" a red \"welcome to northwich\" billboard on a wooden base.", "red heart-shaped object."], "sample_ids": ["225e4094141d416faba7c5598dc55ff8", "a848538c7e4249a4af8e86c477193fa1"], "properties": ["base material is wood, color is red, message is welcome to northwich", "shape, heart, red"], "captions_pred_pc": ["a black and white illustration of a circular object with many small dots on it", "a black and white image of a skateboard with dots"], "captions_pred_image": ["a 3d model of a chalkboard on a pedestal royalty free 3d model preview no.2", "a 3d model of a heart shaped object"], "question": "which object is red", "label": 1}, {"captions": [" a staircase with a railing, table, and chair, featuring a square ceiling light.", " a small house with a tree, pool, and pond in a green and blue landscape."], "sample_ids": ["51a0fba79bce472a8b827b78a110b77f", "e22489ba182f45cb81f0a83f22abe9bd"], "properties": ["stair, table, chair", "house, tree, pool"], "captions_pred_pc": ["for a black and white image of a toilet paper holder", "in 15 words or less a black and white image of a square with dots around it"], "captions_pred_image": ["a 3d model of a staircase in a room", "a 3d model of a house and a tree in a box royalty free 3d model preview no.3"], "question": "which entity has a pool", "label": 1}, {"captions": [" a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole.", " of a small white building or house with a white ceiling and kitchen hood."], "sample_ids": ["b16fb21cda9a4a21a024df749c2304f4", "17b23d23309d4385938ced3ca536a1d1"], "properties": ["roof, ceiling, hole", "building, ceiling, kitchen"], "captions_pred_pc": ["a black and white image of a square with dots on it", "above a black and white drawing of a bathroom with a toilet and sink"], "captions_pred_image": ["a 3d model of a small house and a tree in the foreground", "a 3d model of a white building on a gray background"], "question": "which building has a kitchen?", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", " a brick building with a roof structure and roof truss."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "84e8acad28664a738df69d719df9e263"], "properties": ["yellow, table, roof", "roof, structure, truss"], "captions_pred_pc": ["a black and white drawing of a floor plan", "a black and white polka dots pattern on a white background polka dots pattern on a white background illustration"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of a brick building with a roof"], "question": "which entity has a roof structure", "label": 1}, {"captions": [" a small terracotta vase with a white and brown pattern.", " a white castle composed of small cubes."], "sample_ids": ["767c925e15bd4705a5474ba32d4b3607", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["color, pattern, material", "composed of, white, cubes"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a cookie on a white background royalty free illustration", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["a large white vase on a grey background", "a 3d model of a castle made of white cubes"], "question": "which entity is composed of small cubes", "label": 1}, {"captions": [" a small building with a roof.", " a small white house with a roof."], "sample_ids": ["a3089017e4c5463d852de65abf61eda5", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["roof, building, small", "roof, color, white"], "captions_pred_pc": ["in 15 words or less a black and white image of a building with dots all over it", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a black and white image of a small house", "a 3d model of a building with a white roof"], "question": "which building has a white roof", "label": 1}, {"captions": [" a y-shaped, multicolored stick resembling the letter k, with red, blue, and green segments.", "a white of a woman with her arms outstretched."], "sample_ids": ["9502d395c40d4ec1abb8764b7ca2b9c9", "2cbfaa4fb2d84a6f94e67b4fd6e2f26f"], "properties": ["color, shape, letter", "image, color, white"], "captions_pred_pc": ["a black and white illustration of the letter 'f'", "a black and white silhouette of a person on a skateboard"], "captions_pred_image": ["a 3d model of the letter 'x'", "a 3d model of a woman with her arms outstretched"], "question": "which entity is a white image?", "label": 1}, {"captions": ["a 3d cartoon character wearing a hat and holding various items, such as a sword, gun, broomstick, and stick.", " a city with buildings, houses, trees, and grass."], "sample_ids": ["9863dee114264b89a95dca4c78d08424", "bc649e19956041cf89c1572f1a33cff1"], "properties": ["hat, sword, gun", "buildings, houses, grass"], "captions_pred_pc": ["for a black and white illustration of a small monster holding a spoon in one hand and a cup of coffee in the other", "in 15 words or less a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a police officer holding a baton and wearing a hat and helmet", "an image of a pile of trash on the ground"], "question": "which entity has more buildings", "label": 1}, {"captions": [" a cityscape featuring various buildings, trees, an airport runway, and a plane.", " a house featuring a pitched roof structure with brick detailing."], "sample_ids": ["9b45036d3f0342a78db9a25938dc77fc", "5fbd274f897b44fcafa02ee84228debf"], "properties": ["building, tree, plane", "structure, roof, pitch"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a snowflake in the shape of a snowflake royalty free illustration", "a black and white illustration of a square with a lot of dots on it"], "captions_pred_image": ["a black and white image of an industrial area with buildings and trucks in the foreground", "a 3d model of the roof of a house"], "question": "which entity has a roof that is pitched?", "label": 1}, {"captions": ["a small yellow and green sphere resembling saturn with a hat.", "a low poly model of a chef with a mustache wearing a white hat."], "sample_ids": ["6811a2f3d1154dccb37d534ae673e673", "98c69b92a63a4e1681b8d0cb49aa764a"], "properties": ["color, shape, size", "hat, mustache, chef"], "captions_pred_pc": ["a black and white illustration of a dotted circle on a white background a black and white illustration of a dotted circle on a white background royalty free illustration", "in 15 words or less a black and white illustration of a mushroom on a white background"], "captions_pred_image": ["a 3d model of the planet saturn royalty free 3d model preview no.2", "a 3d model of a chef's hat"], "question": "which entity is a chef?", "label": 1}, {"captions": [" of a white and brown sea shell with a hole and small pearls on it, resembling a sea urchin.", " of a black gun/rifle"], "sample_ids": ["411c164757fc4de68dfecb35fa858223", "b596b52b86914c2b9bc4b4cd096621db"], "properties": ["resembles, sea urchin, shell", "color is black, material is metal, type is gun/rifle"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a", "a black and white image of a gun on a white background"], "captions_pred_image": ["a 3d model of a sea urchin", "a 3d model of a black gun on a gray background"], "question": "which object is made of metal", "label": 1}, {"captions": [" of a wrecked plane, ship, and bird on a pile of rocks with grass.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["b0c703df20154bbf9fd8707c61137fc5", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["plane, ship, bird", "roof, color, yellow"], "captions_pred_pc": ["a black and white watercolor map of the state of ohio", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a black and white photograph of a pile of debris on the ground", "a 3d model of a house with a roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" a pink room featuring a bed, desk, window, and lamp.", " a small house with a roof and ceiling-mounted air conditioner."], "sample_ids": ["395af20de6fe49dbbbb030f0e452cbe1", "6965067ea9e34357a7af21a2f078fbac"], "properties": ["bed, desk, window", "roof, air conditioner, house"], "captions_pred_pc": ["of a black and white drawing of a curved line", "a black and white illustration of a window"], "captions_pred_image": ["a 3d model of a bedroom royalty free 3d model preview no.2", "a 3d rendering of a small house with a covered porch"], "question": "which entity has a roof", "label": 1}, {"captions": ["s of a rock, boat, plane, and leaf on a stick.", " of a bearded man wearing a green shirt and a hat."], "sample_ids": ["be0884a7ced34b3d92687b6087798a1e", "1e4e5e8133ae48c797facaec724c13a5"], "properties": ["s, stick, leaf", "hat, shirt, bearded"], "captions_pred_pc": ["above a black and white drawing of an object floating in the sky", "of a black and white bracelet on a white background"], "captions_pred_image": ["a black and white photograph of a rock on a sandy surface", "a 3d model of a man with a beard"], "question": "which entity has a hat?", "label": 1}, {"captions": [" a small building with a roof.", " of a small wooden house with a blue roof, featuring a police sign and resembling a lighthouse."], "sample_ids": ["a3089017e4c5463d852de65abf61eda5", "9b2c93d651c3409096118c5ce5b993f2"], "properties": ["roof, building, small", "house, roof, blue"], "captions_pred_pc": ["in 15 words or less a black and white image of a building with dots all over it", "a black and white illustration of a coffee mug on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of a small house", "a 3d model of a small house and barn"], "question": "which building has a blue roof", "label": 1}, {"captions": [" a room featuring a wall with a painting, a hole, and a door.", " a small house with a roof and ceiling-mounted air conditioner."], "sample_ids": ["1d1328346a464d2482463d6d5288e934", "6965067ea9e34357a7af21a2f078fbac"], "properties": ["painting, door, wall", "roof, air conditioner, house"], "captions_pred_pc": ["in one hundred words or less an illustration of an igloo on a white background stock illustration", "a black and white illustration of a window"], "captions_pred_image": ["a black and white photograph of a torn piece of paper in the shape of a bird", "a 3d rendering of a small house with a covered porch"], "question": "which entity has a roof", "label": 1}, {"captions": [" a blue circuit board with electronic components.", " a wooden object, including a board, piece of wood, box, and shelf."], "sample_ids": ["4816a2780af54492b6692fd78347f1ac", "c986212445a1466ca7be7b5ac6bea729"], "properties": ["color, blue, components", "wood, board, shelf"], "captions_pred_pc": ["a black and white drawing of a person sitting on a bench", "a black and white drawing of snowflakes on a white background"], "captions_pred_image": ["a 3d printed circuit board with various electronic components", "a 3d rendering of a piece of marble"], "question": "which object is made of wood", "label": 1}, {"captions": [" of a cannon with a wheeled cart.", " of two rocks with ice elements."], "sample_ids": ["bef830ddd37344209fcbf102fca0ef29", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["wheeled, cart, cannon", "image is a rock with ice elements"], "captions_pred_pc": ["above a black and white image of a wheel with two wheels", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a cannon royalty free 3d model", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a black metal shelf with four holes and a laptop on it.", " a small house with a tree in front, situated on a hill."], "sample_ids": ["b3b6f91d939d4193a0090eaabd39eb47", "3bde44b5f10946398f1bb9843dc14caa"], "properties": ["black, laptop, shelf", "house, tree, hill"], "captions_pred_pc": ["a close up of a black and white tile on a white background", "a black and white photo of a cell phone in a puddle of water"], "captions_pred_image": ["a 3d rendering of a black metal shelf", "a 3d model of a house in the middle of a field"], "question": "which entity is situated on a hill", "label": 1}, {"captions": ["a featuring a ship, large rock, stone slab, ruined building, stone floor, small stone structure, triangular object, and a piece of concrete.", " a white sofa, chair, and box."], "sample_ids": ["d83c5a2fd61c4e9f927d1d7b7c9e5aae", "4c59733ebd634594a921b7ace60e4142"], "properties": ["ruined building, stone floor, small stone structure", "sofa, chair, box"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a mountain", "a black and white drawing of a couch with dots"], "captions_pred_image": ["a 3d model of a piece of broken pottery", "a 3d model of a white chair"], "question": "which entity has a sofa", "label": 1}, {"captions": [" a set of yellow and black tools, including a magnifying glass, hexagonal keys, and screwdrivers.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["40285a60f32749a8ae38957c7b073fe8", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, yellow, black", "a, material, clay"], "captions_pred_pc": ["a set of three black and white keychains on a white background", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a pair of hexagonal keys and a pair of hexagonal scissors royalty free 3d model preview no 3", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": [" a blue and white building structure with a table and suspended ceiling.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["fc63507c452c4a4b89f614f2b8bff76a", "b896a0898efe4059a776193c02132129"], "properties": ["color, table, ceiling", "- material is stone, metal, concrete"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a room", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a table with multiple tables stacked on top of each other", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": [" a snow-covered mountain with blue and white stripes.", " a square tile featuring a blue and green pattern, resembling a city map and a dotted floor mat."], "sample_ids": ["a95e4948175142f39e7d157f801c60c3", "a21baaacd25041a9a50ede5e583ddab9"], "properties": ["color, shape, texture", "pattern, color, shape"], "captions_pred_pc": ["above a black and white image of a spiral in the sky", "a black and white polka dot pattern on a white background"], "captions_pred_image": ["a 3d model of a mountain range in the desert royalty-free 3d model preview no. 1", "a 3d image of a city map on a black surface"], "question": "which entity has a pattern of blue and green?", "label": 1}, {"captions": ["\"carl meyer logo with various text variations\"", " a female torso with large breasts, featuring a shirt with a hexagonal pattern, a teddy bear, and a snake head."], "sample_ids": ["eb0d2ef8bc364b1492d1e347e5f38d47", "71143d4e7bc94fc4aa73a2d7c86635cd"], "properties": ["color, shape, text", "torso, breasts, pattern"], "captions_pred_pc": ["the logo for carl meyer", "above a black and white illustration of a woman's breasts"], "captions_pred_image": ["the word 'gil heyer' is written in white letters on a gray background", "a 3d model of a woman's chest"], "question": "which entity has a teddy bear?", "label": 1}, {"captions": [" a woman with long wings, legs, hair, and spikes, accompanied by a spider with long legs.", "a white of a man with arms outstretched."], "sample_ids": ["68cf560d0c424ec6a3c58e1b9967508d", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["hair, legs, wings", "image, color, white"], "captions_pred_pc": ["of a white and black bird flying in front of a white background", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a 3d model of a woman with a large spider-like creature on her back", "a 3d model of a man with his arms outstretched"], "question": "which entity is a photograph?", "label": 1}, {"captions": [" an ice cream machine, popsicle with two sticks, and a shelf with a computer monitor.", " a snowy city with buildings and a plane flying overhead."], "sample_ids": ["cb840159fea7436d81eb33bdccad3596", "cc63ceb2b5e84872a1a1f6423de419e2"], "properties": ["A, a, a", "building, plane, city"], "captions_pred_pc": ["a black and white illustration of a bench", "a black and white photo of an airplane on a white background"], "captions_pred_image": ["a 3d rendering of a white and gray wall mounted shelf", "a 3d model of a city in black and white"], "question": "which entity has a plane flying over it", "label": 1}, {"captions": [" a yellow boat/submarine with a red arrow and light.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["9b9c31fe4e6b4004a4cb34176f329c04", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["color, shape, light", "door, lock, handle"], "captions_pred_pc": ["a black and white image of a letter 'l' on a white background", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a 3d rendering of a table with an object on it", "a black and white image of a door with a crack in it"], "question": "which entity is a door?", "label": 1}, {"captions": [" a modern office building with a green door, green roof, windows, and blue lights.", " a house with a roof, wooden beams, and chimney."], "sample_ids": ["d6087023095446fbadef1721478373b2", "be1376023c274bdda995d54f3694157f"], "properties": ["door, roof, window", "roof, beams, chimney"], "captions_pred_pc": ["a black and white drawing of a toilet brush", "a black and white drawing of a bathroom with a shower"], "captions_pred_image": ["a 3d model of an apartment building", "a 3d model of a house with a roof"], "question": "which building has a roof made of wood?", "label": 1}, {"captions": ["a pink pixelated 3d pig model with black eyes and a handle.", " a small white building with stairs and a white table."], "sample_ids": ["d9006ea4af304f3c9398339f9fc99fc3", "e30374c614f54fdb90f35b96b071349d"], "properties": ["color, eye, handle", "building, stairs, table"], "captions_pred_pc": ["above a black and white drawing of a room", "above a black and white drawing of a cat sitting on top of a letter 'e'"], "captions_pred_image": ["a 3d model of a small white box", "a 3d model of a building with a staircase"], "question": "which entity has a table?", "label": 1}, {"captions": [" a small white house with stairs and a wall-mounted shelf.", " a wooden shed with a gray roof."], "sample_ids": ["10c4ba5b0db4490db9c00c21c94cb41f", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["house, color, white", "roof, color, gray"], "captions_pred_pc": ["above a black and white drawing of a bench", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small white building", "a 3d model of a shed with a gray roof"], "question": "which building has a gray roof", "label": 1}, {"captions": ["s of a boat, bird, paper airplane, and kite flying in the air.", " of a small white building with stairs and a lid."], "sample_ids": ["795cfa41d48a4cfc893ff1981318594d", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["s, boat, bird, airplane, kite", "building, stairs, lid"], "captions_pred_pc": ["above a 3d illustration of a boy standing with his arms outstretched", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a white kite flying in the air against a gray background", "a 3d model of a white box on a gray background"], "question": "which entity has a lid?", "label": 1}, {"captions": ["a 3d object collection featuring a long stick, a grassy hill, a fish, a green and yellow knife, a small green and purple fish, a bird, and a crocodile.", " of a black table, road with white lines, and rectangular ceiling panel."], "sample_ids": ["856e5f6a854d4c608901f1e2b580344c", "bdc9b72d13604f5a8c7bd6ace2ca3177"], "properties": ["a, b, c", "black, road, panel"], "captions_pred_pc": ["above a black and white drawing of a submarine", "a black and white dotted square on a white background"], "captions_pred_image": ["a black and white photograph of a fish on a gray background", "a 3d rendering of an empty road"], "question": "which entity has a black table?", "label": 1}, {"captions": ["a featuring a boat, table, chairs, umbrella, and solar panel.", " a furnished room featuring a table, chairs, desk, shelves, and a small kitchen and living area."], "sample_ids": ["0f0eb3a198d341d28f809b6d7634be8a", "9403cf50e8cb44c195b76afd89d0c9fb"], "properties": ["boat, table, chairs, umbrella, solar panel", "furniture, room, kitchen"], "captions_pred_pc": ["a black and white illustration of a boat with an umbrella", "a black and white image of a room with dots all over it"], "captions_pred_image": ["a 3d model of a boat, a table, chairs, and an umbrella", "a 3d rendering of a room with a table, chairs, and shelves"], "question": "which entity has a kitchen?", "label": 1}, {"captions": [" a house with roof trusses and wooden beams on a suspended ceiling.", " a white building, table, and various piles of paper, including a low-poly object."], "sample_ids": ["3c2e3a3670b042069bd8290e2c357702", "515210fb031f4ec89021ee8ce9e432e9"], "properties": ["roof trusses, beams, suspended ceiling", "- building is white, table is white, piles of paper are white"], "captions_pred_pc": ["above a black and white drawing of a building", "a black and white drawing of a piece of paper"], "captions_pred_image": ["a 3d model of a house with a roof in progress royalty free 3d model preview no. 1", "a 3d model of a snowy landscape"], "question": "which entity has a white building?", "label": 1}, {"captions": [" a silver vintage racing car.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["9d8a1e856251435f9596c031005520bd", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["color, silver, vintage", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white image of a chair with dots all over it", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a vintage racing car", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a metal-framed wall with red and blue bars in a steel structure.", " a colorful, wire-framed building structure resembling a cube."], "sample_ids": ["fefc99453e2d4406a9668d5697224c0f", "62b7c7c684044d998fee9ff35beeb79b"], "properties": ["color, red, blue, structure", "color, frame, shape"], "captions_pred_pc": ["a black and white image of a person holding a toothbrush", "a black and white illustration of a building made up of dots"], "captions_pred_image": ["a 3d rendering of a metal frame structure", "a 3d model of a building structure"], "question": "which structure is a cube?", "label": 1}, {"captions": ["a 3d pink spiky spherical flower.", " a toy motorcycle, car, and robot on an orange platform."], "sample_ids": ["039a3fc74e39450883c46acbe2f57476", "7407a108e0354925b83b750339bc03df"], "properties": ["color, shape, texture", "platform, color, orange"], "captions_pred_pc": ["a black and white drawing of a snowflake on a white background", "a black and white illustration of a bicycle"], "captions_pred_image": ["a 3d model of a snowflake on a white background", "a 3d model of a motorcycle on a pedestal"], "question": "which entity is a toy?", "label": 1}, {"captions": ["a white hat, plastic cup with a lid, and a bowl.", " a staircase with a railing, table, and chair, featuring a square ceiling light."], "sample_ids": ["4a9d79b48eda4ad797a652ee01b1b026", "51a0fba79bce472a8b827b78a110b77f"], "properties": ["hat, cup, bowl", "stair, table, chair"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a funnel", "for a black and white image of a toilet paper holder"], "captions_pred_image": ["a white plastic hat and bowl on a gray background", "a 3d model of a staircase in a room"], "question": "which entity has a table and chair?", "label": 1}, {"captions": [" a witch wearing a black and white hat.", "a white 3d-printed plastic container set with a lid, two small containers, and a hat-like attachment."], "sample_ids": ["1516bf87d6ee47fa9ede71bf77757b29", "1da865c75a5e4a57a17652975dae5474"], "properties": ["color, black, white", "color, white, plastic"], "captions_pred_pc": ["a black and white illustration of a person wearing a wizard's hat and standing next to a snowflake", "a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a"], "captions_pred_image": ["a 3d model of a witch wearing a hat and carrying a broom", "a 3d model of a white box, a white lid, and a white container"], "question": "which object is white", "label": 1}, {"captions": [" a clear glass table with metal legs and balls on top.", " a table with pillar-like yellow and white poles, featuring a wooden structure, large tent, glass roof, and a chandelier with numerous glass tubes."], "sample_ids": ["7c2bfa826f274377ac21f48d510848c3", "fa06167d83e54b05bdfbeeae2ca7c8a6"], "properties": ["glass, metal, balls", "table, structure, roof"], "captions_pred_pc": ["a black and white image of a wine glass", "a black and white image of a map with dots"], "captions_pred_image": ["a clear acrylic foosball table", "a 3d model of a building with many pillars"], "question": "which table has a glass roof", "label": 1}, {"captions": [" a small house with a road in front of it.", " a small wooden house."], "sample_ids": ["9ff45258feba4c68bb279efeb829538f", "4cb4dba1237443eb8dc299530fa12521"], "properties": ["house, front, road", "house, material, wood"], "captions_pred_pc": ["above a black and white drawing of a building", "a house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots"], "captions_pred_image": ["a 3d model of a small house on a road", "a 3d model of a small cottage"], "question": "which house is made of wood", "label": 1}, {"captions": [" of a gray stereo system with a blue drawer, resembling a printer and computer with a blue screen, featuring a cd player.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["3d4d965e67744415b69ff6aaeb11f420", "b896a0898efe4059a776193c02132129"], "properties": ["color, screen, drawer", "- material is stone, metal, concrete"], "captions_pred_pc": ["above a black and white image of a brush", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a printer on a white background royalty free 3d model no.2", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": [" of a wooden tv stand with two drawers and handles.", " a white and gold mirror on a wooden easel stand."], "sample_ids": ["6409380e790442e6a5733eb447d4c510", "0d10d734448d4a5d8d07b938c12d9d80"], "properties": ["Drawer, Handle, Wood", "color, white, gold"], "captions_pred_pc": ["a black and white drawing of a line of dots on a white background", "for a black and white image of a shoe on a white background"], "captions_pred_image": ["a 3d rendering of a wooden entertainment center", "a 3d model of a standing mirror on a white background"], "question": "which object is made of wood", "label": 0}, {"captions": [" a house with a blue roof.", " a city with buildings, houses, trees, and grass."], "sample_ids": ["8ff693cd3ca74f8a901ca259b8b3a7ac", "bc649e19956041cf89c1572f1a33cff1"], "properties": ["roof, color, blue", "buildings, houses, grass"], "captions_pred_pc": ["a black and white drawing of a cross on a white background royalty free illustration", "in 15 words or less a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a house with a roof", "an image of a pile of trash on the ground"], "question": "which entity has more grass", "label": 1}, {"captions": ["a 3d cartoon character wearing a hat and holding various items, such as a sword, gun, broomstick, and stick.", " a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog."], "sample_ids": ["9863dee114264b89a95dca4c78d08424", "f6c6e7a65a3e42dfa431b1d984c72f28"], "properties": ["hat, sword, gun", "house, fence, dog"], "captions_pred_pc": ["for a black and white illustration of a small monster holding a spoon in one hand and a cup of coffee in the other", "above a black and white drawing of a bathroom"], "captions_pred_image": ["a 3d model of a police officer holding a baton and wearing a hat and helmet", "a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a"], "question": "which entity has a fence", "label": 1}, {"captions": [" a small white archway structure resembling a building.", " a small village featuring houses, trees, and a winding road."], "sample_ids": ["5ad02458cf394134a902e25001d2ffef", "7acf46c0265d4e39b97ac084852abde8"], "properties": ["structure, building, archway", "houses, trees, road"], "captions_pred_pc": ["for a black and white illustration of a castle on a hill", "in 15 words or less a black and white photo of a mountain landscape"], "captions_pred_image": ["a 3d rendering of a white object on a white surface", "a black and white photograph of a small town"], "question": "which entity has a road?", "label": 1}, {"captions": [" a black castle with stairs and a door.", " a spiral staircase with a railing and wooden floor in a house."], "sample_ids": ["036d34dec6274f6a99d8f9689d19a77d", "40921ffd69db479294554d261daf3035"], "properties": ["door, stairs, color", "floor, railing, staircase"], "captions_pred_pc": ["for a black and white image of a toilet paper holder", "above a black and white image of a computer screen"], "captions_pred_image": ["a 3d model of a medieval castle on a white background royalty free 3d model", "a 3d model of a spiral staircase"], "question": "which staircase has a railing", "label": 1}, {"captions": [" a white box with a gold handle, featuring a spring inside and functioning as a kitchen utensil holder or electrical box, with gold details.", "a white teapot with pink flowers and a handle."], "sample_ids": ["ca275639b47a4093b4426d304695af7f", "f6c5e8931d164979a71914127c7e5438"], "properties": ["holder, spring, box", "color, white, handle"], "captions_pred_pc": ["in 15 words a black and white illustration of a square with dots on a white background abstract illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white", "a black and white dots on a white background"], "captions_pred_image": ["a 3d model of a plastic container with a spring inside", "a white tea kettle with a black handle and floral design"], "question": "which object has a handle", "label": 1}, {"captions": [" a red and blue metal-framed building structure with a steel house frame.", "a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars."], "sample_ids": ["a453ac83ef1e4f76bd7978451888d5f5", "1b3945962a4b4cda9fe939dc5d63e789"], "properties": ["color, material, frame", "a room, a cake, a table"], "captions_pred_pc": ["of a black and white photo of a bike on a white background", "a black and white illustration of an object on a white background"], "captions_pred_image": ["a 3d model of a metal frame structure", "a 3d rendering of a white room with various items in it"], "question": "which entity has a room?", "label": 1}, {"captions": [" a house featuring a roof, floor plan, small bathroom, pool, and ceiling light fixtures.", " of an egyptian sarcophagus."], "sample_ids": ["6c6549e2975a48a1b59ebe2a6562900e", "70aa484af2ab44149a608dd81a6ff459"], "properties": ["floor plan, bathroom, pool", "sarcophagus, material, wood"], "captions_pred_pc": ["the floor plan of the house the floor plan of the house on a white background royalty free illustration", "a black and white circular pattern on a white background"], "captions_pred_image": ["a 3d model of a small house", "a black and white photograph of a sphere with egyptian hieroglyphics on it"], "question": "which object is made of wood", "label": 1}, {"captions": ["green and black ak-47 assault rifle with grenade launcher.", " a small purple plastic chair with holes."], "sample_ids": ["9ffa5ac853354223a7f4f42d532f7633", "fe2bf0f8f5c64dd6bac3e2da0d1b89d0"], "properties": ["color, black, green", "color, plastic, purple"], "captions_pred_pc": ["a black and white image of a cell phone on a white background", "this image may contain clothing apparel accessory purse bag and handbag"], "captions_pred_image": ["a 3d model of an ak 47 assault rifle royalty free 3d model preview no 2", "3d model of a chair royalty free 3d model preview no 3"], "question": "which object is made of plastic", "label": 1}, {"captions": [" of a roman inscription on a stone plaque and paper scroll with writing.", " a small, rocky island with diverse terrain and scattered rocks."], "sample_ids": ["0a281f5738b54febb835c052549ba399", "09f2cf267e954c958828325067bcc36a"], "properties": ["inscription, scroll, stone", "island, terrain, rocks"], "captions_pred_pc": ["a black and white photograph of a mirror", "above a black and white photo of a small island in the middle of a body of water"], "captions_pred_image": ["a black and white photograph of a plaque on a stone wall", "a black and white image of a piece of dirt on the ground"], "question": "which entity has more rocks", "label": 1}, {"captions": [" a small house with a roof.", " a small, rocky island with diverse terrain and scattered rocks."], "sample_ids": ["0d2246e433ce4066b76489f17ba8d694", "09f2cf267e954c958828325067bcc36a"], "properties": ["roof, house, small", "island, terrain, rocks"], "captions_pred_pc": ["a black and white square made up of small dots on a white background", "above a black and white photo of a small island in the middle of a body of water"], "captions_pred_image": ["a 3d model of a house with a triangular roof", "a black and white image of a piece of dirt on the ground"], "question": "which entity is a small island?", "label": 1}, {"captions": [" a white and yellow metal shelving unit with a steel structure, yellow legs, and suspended ceiling system.", "a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier."], "sample_ids": ["c1a7d264b34841409009b3d5d39d5b99", "ef8288c9fdfc4e0f9c1fe25d570a104e"], "properties": ["Steel, Color, Yellow", "color is white, yellow, plastic"], "captions_pred_pc": ["a black and white illustration of a building", "a black and white image of a metal bowl with dots"], "captions_pred_image": ["a 3d model of a table with multiple tables and chairs", "a white plastic container with a label on it"], "question": "which entity is made of plastic", "label": 1}, {"captions": ["a small 3d purple teapot and elephant.", " of a wooden windmill with a red roof."], "sample_ids": ["4a27592dc8164f709b44446ee11832c0", "2ad8fca30285483d8b7f602fa078215d"], "properties": ["color, shape, material", "roof, color, red"], "captions_pred_pc": ["a black and white 3d illustration of an elephant's head on a white background 3d illustration of an elephant's head on a white background royalty free illustration", "a white and black image of a snowflake in the shape of a snowflake"], "captions_pred_image": ["a white ceramic teapot on a gray background", "a 3d model of a windmill on a gray background"], "question": "which object is made of wood", "label": 1}, {"captions": [" a small house on a hill in a field.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["bd873071252047d38160c4a5fdd2c1b7", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["house, hill, field", "roof, color, yellow"], "captions_pred_pc": ["a black and white photograph of a piece of paper", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of a small house", "a 3d model of a house with a roof"], "question": "which house has a yellow roof", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", " a small white barn with a metal roof."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "4ca3342a96824684845f7d0e062ab176"], "properties": ["yellow, table, roof", "roof, metal, white"], "captions_pred_pc": ["a black and white drawing of a floor plan", "in 15 words or less a black and white illustration of a house made of dots"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of a barn"], "question": "which entity has a roof made of metal", "label": 1}, {"captions": [" a crab with long legs, wires, and tentacles, resembling a hybrid of an octopus, squid, spider, and robot.", "white of a rhino head with horns."], "sample_ids": ["ec5914b53b6a4cde8de4820050bc46c5", "8481aade84de47cab1a9accf8067e678"], "properties": ["resembles, octopus, squid, spider, robot", "image, rhino, head"], "captions_pred_pc": ["a black and white illustration of a jellyfish", "of a penguin skull in black and white"], "captions_pred_image": ["a 3d model of a robotic octopus on a white background royalty free 3d model preview no.2", "rhino head 3d model royalty free 3d model preview no 3"], "question": "which image shows a rhino head?", "label": 1}, {"captions": ["a 3d object collection featuring a long stick, a grassy hill, a fish, a green and yellow knife, a small green and purple fish, a bird, and a crocodile.", " a stone throne with stairs and a tree, accompanied by a concrete wall featuring a statue and a fireplace with a shelf above it."], "sample_ids": ["856e5f6a854d4c608901f1e2b580344c", "93fb4197f0014f7582029af24c7ed9de"], "properties": ["a, b, c", "throne, stairs, tree"], "captions_pred_pc": ["above a black and white drawing of a submarine", "in 15 words or less a black and white image of a toilet paper roll on a white background royalty free illustration"], "captions_pred_image": ["a black and white photograph of a fish on a gray background", "a 3d model of a throne with a tree on it"], "question": "which entity has a throne", "label": 1}, {"captions": [" of a white spiral, earbuds, ceiling light, and silver ring with black and white scissors and design.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["c69f60b389124ad9b4f81c64ec332054", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["earbuds, light, ring", "house, fence, playground"], "captions_pred_pc": ["a black and white drawing of a needle and thread", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a black and white illustration of a pair of sunglasses and a pair of scissors next to each other on a white background", "a 3d model of a room with a lot of wires"], "question": "which entity has a fence", "label": 1}, {"captions": [" of a pile of metal, torn and shredded paper, rocks, and a decayed animal.", " of a white and brown sea shell with a hole and small pearls on it, resembling a sea urchin."], "sample_ids": ["c117f1923cad4ecf9df61b6e3d633374", "411c164757fc4de68dfecb35fa858223"], "properties": ["a pile of metal, torn and shredded paper, rocks, and a decayed animal", "resembles, sea urchin, shell"], "captions_pred_pc": ["a black and white map of germany on a white background", "in 15 words or less a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a"], "captions_pred_image": ["a 3d model of a crumpled piece of paper on a white background", "a 3d model of a sea urchin"], "question": "which entity is a shell?", "label": 1}, {"captions": ["a featuring a ship, large rock, stone slab, ruined building, stone floor, small stone structure, triangular object, and a piece of concrete.", "a featuring a white box with a hole, a paper clip, and a lock."], "sample_ids": ["d83c5a2fd61c4e9f927d1d7b7c9e5aae", "839e143bb1684aaa955f2c3e0cf4eef2"], "properties": ["ruined building, stone floor, small stone structure", "box, paper clip, lock"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a mountain", "above a black and white image of a clock in the shape of a spiral"], "captions_pred_image": ["a 3d model of a piece of broken pottery", "a 3d model of a stapler with a staple in it royalty free 3d model no."], "question": "which entity has a lock?", "label": 1}, {"captions": ["a white teddy bear, cloud, skull, octopus, and bird in various positions on a gray background.", " of a cherry blossom bonsai tree with pink flowers."], "sample_ids": ["dd5849aced0443b1b4b38d413f7e06c4", "037fff0f153c41ea8b9c9392c2e2439a"], "properties": ["background, color, white", "flower, color, pink"], "captions_pred_pc": ["a black and white image of a cat's head", "for a black and white illustration of a person on a skateboard"], "captions_pred_image": ["a 3d model of an animal skull in white on a gray background", "a 3d model of a bonsai tree on a pedestal"], "question": "which entity has a pink flower?", "label": 1}, {"captions": [" a medieval stone castle with walls and stairs.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["10bc1bbec4c045f9b15fc2156b5e32ee", "97e000ff41094665afd94ea565da8b13"], "properties": ["wall, stairs, castle", "roof, material, wood"], "captions_pred_pc": ["a castle in the snow a castle in the snow illustration on a white background royalty free illustration", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the great wall of china 3d model of the", "a 3d model of the roof of a building"], "question": "which structure is made of wood", "label": 1}, {"captions": [" a large bridge over a highway, with an airport and train station nearby.", " a small white building with stairs and a white table."], "sample_ids": ["b348fddc913f47df93cf35db302427d0", "e30374c614f54fdb90f35b96b071349d"], "properties": ["location, highway, train station", "building, stairs, table"], "captions_pred_pc": ["a black and white photo of a small square on a white background", "above a black and white drawing of a cat sitting on top of a letter 'e'"], "captions_pred_image": ["an aerial view of a city with a highway in the foreground and buildings in the background", "a 3d model of a building with a staircase"], "question": "which entity is a building?", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", "a 3d wooden sign featuring various texts, including \"resin park,\" \"shabaab corporation,\" and \"johnson sammons.\""], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "34a49861e7744acfb71de471a755e917"], "properties": ["color, white, black, white", "text, material, shape"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "a black and white image of a metal bar with text on it"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d rendering of a marble slab with text on it"], "question": "which entity is a sign?", "label": 1}, {"captions": [" of a white plastic tube or metal bar, resembling a knife.", " a house with a wooden-framed roof structure."], "sample_ids": ["8fd3836862a44a8d8b4d224bfc30c2c3", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["a knife, blade, handle", "roof, material, wood"], "captions_pred_pc": ["a black and white image of a shelf with a white background", "a black and white drawing of a staircase"], "captions_pred_image": ["a piece of white plastic on a gray background", "a 3d model of a building with a roof"], "question": "which object is made of wood", "label": 1}, {"captions": ["a white featuring a toilet, robot, kitchen appliance, golf cart, and suitcase.", " a multicolored cube representing a protein, featuring pink, yellow, red, and green hues."], "sample_ids": ["eddc90c61d6d40a5bc1c1830e58fd568", "ee7c3113f2754f9cbe8980b1b7cc4eff"], "properties": ["color, toilet, robot", "color, shape, color"], "captions_pred_pc": ["a close up of a plastic sponge on a white background", "a black and white pattern of small dots on a white background a black and white pattern of small dots on a white background illustration"], "captions_pred_image": ["a white 3d model of a toilet seat with a hose attached to the back of it", "a 3d model of a piece of fabric"], "question": "which entity is a cube?", "label": 1}, {"captions": [" a small building with stairs and a glass floor, featuring a square table and a black square ceiling light.", " a wooden table and bench with a deer head and branch on it."], "sample_ids": ["8aaad713b8834739b008ccf2f3d86cce", "857d5391612349f4ae6cd854a1ec96de"], "properties": ["floor, table, light", "table, bench, deer"], "captions_pred_pc": ["above a black and white photograph of a window", "a black and white drawing of a table and chairs"], "captions_pred_image": ["a black and white 3d model of a staircase on a platform", "a black and white image of a bench and table with a deer's head on the table"], "question": "which table is made of wood", "label": 1}, {"captions": ["a featuring a sailboat with a hook, a bird suspended in the air, and a pair of scissors.", " of a meat skewer with a small piece of bread and a sausage on a stick."], "sample_ids": ["f57ae66555d34349aeadc38b33f8f267", "1728f2cb8eca4080af02b22262ff45d5"], "properties": ["a, bird, hook", "meat, bread, sausage"], "captions_pred_pc": ["of a 3d scan of a person's torso and limbs", "a black and white image of a brush on a white background"], "captions_pred_image": ["a black and white photo of a kite flying in the sky", "an image of a small white object on a gray background"], "question": "which entity is a food?", "label": 1}, {"captions": ["a 3d white cube featuring various text and logos, including \"gypsy stribes,\" \"guinea pig studios,\" \"guillaume pi sodds,\" \"happy studios,\" and \"guillem pie sos.\"", " a mountainous landmass, resembling a small island or state like kentucky or wyoming."], "sample_ids": ["5d08c34bfb2c4c9b9538e24d68761331", "8ca9b999b69c4965bd9eb4445d605bf2"], "properties": ["- material is plastic- color is white- shape is cube", "mountainous, landmass, state"], "captions_pred_pc": ["of a black and white photo of a person sitting on a bench", "a black and white map of the state of new york"], "captions_pred_image": ["a 3d image of a cube with the word 'stories' written on it", "a 3d model of a piece of paper"], "question": "which entity is a mountainous landmass", "label": 1}, {"captions": ["a featuring a green and white umbrella, flying dragon, cloth, sword, bike, insect, and a green and black creature with a bicycle with two legs.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["ee85486b41c1442aaf35fc2563381ad0", "97e000ff41094665afd94ea565da8b13"], "properties": ["color, dragon, cloth", "roof, material, wood"], "captions_pred_pc": ["a 3d rendering of an alien toy on a white background royalty free 3d illustration", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d model of a black kite flying in the sky royalty free 3d model preview no. 3", "a 3d model of the roof of a building"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": [" of colorful cubes on a round table with a sphere featuring a rainbow stripe.", " a table with pillar-like yellow and white poles, featuring a wooden structure, large tent, glass roof, and a chandelier with numerous glass tubes."], "sample_ids": ["2cb2483cfe4c41bebcbc02c1cbe10ab4", "fa06167d83e54b05bdfbeeae2ca7c8a6"], "properties": ["color, table, sphere", "table, structure, roof"], "captions_pred_pc": ["a black and white illustration of a group of cubes floating in the air", "a black and white image of a map with dots"], "captions_pred_image": ["a 3d rendering of a group of cubes on a circular surface", "a 3d model of a building with many pillars"], "question": "which table has a wooden structure", "label": 1}, {"captions": [" a crab with long legs, wires, and tentacles, resembling a hybrid of an octopus, squid, spider, and robot.", " a white plastic box/tray with a hole in the middle."], "sample_ids": ["ec5914b53b6a4cde8de4820050bc46c5", "04f8bfad8ad14795aced8a83ea30ca60"], "properties": ["resembles, octopus, squid, spider, robot", "color is white, material is plastic, shape is box"], "captions_pred_pc": ["a black and white illustration of a jellyfish", "a black and white image of a rectangular tray on a white background"], "captions_pred_image": ["a 3d model of a robotic octopus on a white background royalty free 3d model preview no.2", "a 3d model of a white plastic tray"], "question": "which object is white", "label": 1}, {"captions": [" of a toy mushroom character with a white and brown head.", " of a white supreme logo t-shirt, low poly design."], "sample_ids": ["ae8a73809d4647c09cc82f403e47de1d", "bea8441c08d94366b96b53775391d8e6"], "properties": ["color, head, white and brown", "color, white, logo"], "captions_pred_pc": ["a black and white illustration of a butterfly sitting on a dandelion stock illustration", "for a black and white image of a shirt with dots"], "captions_pred_image": ["a 3d model of a gray and white cartoon character", "a 3d model of a white t-shirt with a supreme logo"], "question": "which entity has a white logo?", "label": 1}, {"captions": [" a staircase with a railing, table, and chair, featuring a square ceiling light.", " a wooden table with a plant on it."], "sample_ids": ["51a0fba79bce472a8b827b78a110b77f", "3170a8fc0ebf4d71ab19c723be68987f"], "properties": ["stair, table, chair", "table, plant, wood"], "captions_pred_pc": ["for a black and white image of a toilet paper holder", "a 3d model of the molecule on a white background a 3d model of the molecule on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a staircase in a room", "a side table with a potted plant sitting on top of it"], "question": "which table has a plant on it", "label": 1}, {"captions": [" a house with roof trusses and wooden beams on a suspended ceiling.", " a white castle composed of small cubes."], "sample_ids": ["3c2e3a3670b042069bd8290e2c357702", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["roof trusses, beams, suspended ceiling", "composed of, white, cubes"], "captions_pred_pc": ["above a black and white drawing of a building", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["a 3d model of a house with a roof in progress royalty free 3d model preview no. 1", "a 3d model of a castle made of white cubes"], "question": "which entity is composed of small cubes", "label": 1}, {"captions": [" of a small blue and green gazebo with a table and chairs.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["0a3d553ed5d54c9794494af4f7a7e1c6", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["color, gazebo, table", "house, fence, playground"], "captions_pred_pc": ["a black and white illustration of a gazebo in the middle of a field of polka dots stock photography \u00a9 2018 iStock", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a 3d model of a small gazebo with a fountain in the center", "a 3d model of a room with a lot of wires"], "question": "which entity has a fence", "label": 1}, {"captions": [" a house featuring a wooden roof truss structure and ceiling with wood beams.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["990f06da2ba4488da8371f68da6b4523", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["roof truss, beams, structure", "house, roof, wooden"], "captions_pred_pc": ["a black and white illustration of a staircase in the shape of the letter 'l'", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a house with a wooden roof", "a black and white photograph of a birdhouse"], "question": "which house has a wooden roof", "label": 0}, {"captions": [" a house with a green roof and lawn.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["roof, green, lawn", "camera, speaker, ceiling fan"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker", "label": 1}, {"captions": ["a collection featuring a human skull, jaw with teeth, foot, moose skull, and bones with two horns.", " a staircase with a railing, table, and chair, featuring a square ceiling light."], "sample_ids": ["a44f18a8950b477eba11ba5120e95c38", "51a0fba79bce472a8b827b78a110b77f"], "properties": ["collection, moose, skull", "stair, table, chair"], "captions_pred_pc": ["a black and white illustration of the letter 'v'", "for a black and white image of a toilet paper holder"], "captions_pred_image": ["a 3d model of a human skull with missing teeth royalty-free 3d model preview no.", "a 3d model of a staircase in a room"], "question": "which entity is a staircase?", "label": 1}, {"captions": [" a house with a purple roof and glass block structure.", " a small white bookcase-like building with stairs and a light fixture."], "sample_ids": ["e8ac7de076e54f07ace1a0ead07f6f57", "5f99eb9d1f1e4d57b5690446f832c841"], "properties": ["roof, color, purple", "building, color, white"], "captions_pred_pc": ["a black and white image of a fire hydrant on a white background fire hydrant on a white background royalty free illustration", "in 15 words or less a black and white image of the letter 'f' made up of dots"], "captions_pred_image": ["a 3d model of a building with a glass roof", "a 3d model of a bookshelf on a white background"], "question": "which building is white", "label": 1}, {"captions": ["a featuring a futuristic chair, a black cat with a sword, a person holding an umbrella, and a black and blue dragon, airplane, and helicopter.", " a damaged room with destroyed furniture, featuring a kitchen, bathroom, and broken window."], "sample_ids": ["4df70180f2ea400782d2e2de76063894", "06dd6456dc244a51b6b6e1c8524820de"], "properties": ["color, black, blue", "room, furniture, window"], "captions_pred_pc": ["a 3d illustration of a girl in a dress 3d illustration of a girl in a dress, isolated on a white background royalty free stock illustration", "above a black and white drawing of a person sitting on a bench"], "captions_pred_image": ["a black and white 3d model of a person holding a sword in the shape of a paper airplane", "a 3d image of a room with a person in it"], "question": "which entity has a damaged window", "label": 1}, {"captions": ["a featuring a lamp, harp, white bowl, and white curved wall.", " a suitcase on wheels, featuring a bird, a laptop, a robot, and a vacuum cleaner."], "sample_ids": ["55bcec23e1b34f0d9d748b4dcc3ea123", "e8100bef7b8a48d4ac79684bffb349ba"], "properties": ["lamp, harp, bowl", "Wheels, laptop, robot"], "captions_pred_pc": ["a black and white illustration of a curved line", "a black and white drawing of a cell phone"], "captions_pred_image": ["a 3d model of a harp in a white room", "a black and white image of a suitcase on wheels"], "question": "which entity has a laptop?", "label": 1}, {"captions": [" of a machine gun.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["2332041c12f640e6a9ce432e6e278455", "bf18bfd89efd43389781050230467d58"], "properties": ["gun, barrel, caliber", "Lights, number, five"], "captions_pred_pc": ["a silhouette of a machine gun on a white background", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a machine gun royalty free 3d model preview no 2", "a white chandelier with five white shades"], "question": "which entity has a smaller number of lights", "label": 1}, {"captions": [" a pink room featuring a bed, desk, window, and lamp.", "a white of a woman with her arms outstretched."], "sample_ids": ["395af20de6fe49dbbbb030f0e452cbe1", "2cbfaa4fb2d84a6f94e67b4fd6e2f26f"], "properties": ["bed, desk, window", "image, color, white"], "captions_pred_pc": ["of a black and white drawing of a curved line", "a black and white silhouette of a person on a skateboard"], "captions_pred_image": ["a 3d model of a bedroom royalty free 3d model preview no.2", "a 3d model of a woman with her arms outstretched"], "question": "which entity is a photograph?", "label": 1}, {"captions": [" a small island featuring a large building, trees, and a house on a hill, surrounded by a forest.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["d557c62e9be741a6b0f6b204d11a9c6f", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["house, hill, forest", "camera, speaker, ceiling fan"], "captions_pred_pc": ["above a black and white illustration of a small island in the middle of a body of water", "for a black and white image of an object on a white background"], "captions_pred_image": ["a black and white image of a small island in the middle of a body of water", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": [" a white dragon with red horns and eyes.", " a stone, wood, rock, sliced bread, and a skull with a blue hat."], "sample_ids": ["b7efa835186c4084b4d6ca2479af78fb", "0169af65ffc64bbf8e2fe6c6de08d485"], "properties": ["color, horns, eyes", "hat, skull, bread"], "captions_pred_pc": ["above a black and white image of a sculpture made up of dots", "a black and white illustration of a skull in the shape of dots"], "captions_pred_image": ["a 3d model of a goat's head with horns", "a black and white image of a stone sculpture"], "question": "which entity has a skull?", "label": 1}, {"captions": ["a white ceramic vase with the words 'happy' and 'fish' written on it.", " of a rock with a screw and a nail sticking out of it."], "sample_ids": ["243cd2c469984313b1522dca099eefd3", "b0427ff55c4346e293c1241c71d30528"], "properties": ["color, white, material, ceramic", "a, rock, nail"], "captions_pred_pc": ["a black and white image of a vase in the shape of a fish", "above a black and white illustration of a bird sitting on top of a rock"], "captions_pred_image": ["a white vase on a grey background", "a 3d image of a rock with a screw in it"], "question": "which object is made of a rock?", "label": 1}, {"captions": [" a multicolored metal building structure with a roof.", "a pair of black armored warriors with swords, spears, and cloaks, including a female warrior and a dark knight."], "sample_ids": ["22483891fd124baca3bbc6a6a49adc9c", "46511f6f0fd04198b4005c159ac8ae40"], "properties": ["color, roof, structure", "cloak, sword, spear"], "captions_pred_pc": ["of a black and white photo of a bike on a white background", "a black and white photo of a pair of snowflakes on a white background snowflakes on a white background royalty free stock photography"], "captions_pred_image": ["a 3d model of a barn structure", "a 3d model of a pair of armored knights standing next to each other"], "question": "which entity is a pair?", "label": 1}, {"captions": ["3d snowman model with a wooden stick.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["44f41039246a4df59027c38023d5a576", "b896a0898efe4059a776193c02132129"], "properties": ["- material is wood - color is white - shape is 3d", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d snowman royalty-free 3d model preview", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": [" a red and black jellyfish, flower, crab, and bomb.", " a staircase with a glass railing and a small white table, featuring a ceiling light."], "sample_ids": ["2cc8ee5ed7e74a08aec2f503ddc49d25", "10b899daca25493cba6bfffbbe7990fe"], "properties": ["color, shape, and size", "railing, glass, table"], "captions_pred_pc": ["a 3d sculpture of a girl in a dress holding an umbrella", "above a black and white photograph of a cell phone"], "captions_pred_image": ["a black and white image of a group of small objects floating in the air", "a 3d rendering of a staircase with a glass railing"], "question": "which entity has a glass railing", "label": 1}, {"captions": [" a brown couch with peeling paint and tape on it.", " a house with a roof and beams."], "sample_ids": ["8da7e0d122f544e2862b4e592988e183", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["brown, paint, tape", "roof, beams, house"], "captions_pred_pc": ["above a black and white image of a couch with a leopard print pattern", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a couch royalty free 3d model preview no.2", "a 3d model of a building with a roof"], "question": "which object has a roof?", "label": 1}, {"captions": [" a small house with stairs and a roof.", " a small white house with stairs and a spiral staircase, featuring a white table and ceiling light."], "sample_ids": ["e9305c80010f4e3b9de9789f01a9bee5", "e9e1cc7fae22458197a61f43a9c355f4"], "properties": ["roof, stairs, house", "house, staircase, table"], "captions_pred_pc": ["above a black and white image of a square with dots all over it", "above a black and white photograph of a dog in a frame"], "captions_pred_image": ["a 3d rendering of a podium on a wooden floor", "a 3d model of a small house with a spiral staircase"], "question": "which house has a spiral staircase", "label": 1}, {"captions": ["a low poly of a tree with red apples and a green cactus with red dots.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["fd7765e391cd49ccbc72891d90850cdb", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["color, red, apples, tree, cactus, dots", "island, terrain, water"], "captions_pred_pc": ["a black and white illustration of a snowflake on a white background", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a group of geometric shapes", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" a red \"welcome to northwich\" billboard on a wooden base.", " a wooden staircase with a red and white railing and a red arrow."], "sample_ids": ["225e4094141d416faba7c5598dc55ff8", "bb8bb4c9972d4b718b8bbd3ed5fdd14d"], "properties": ["base material is wood, color is red, message is welcome to northwich", "arrow, red, white"], "captions_pred_pc": ["a black and white illustration of a circular object with many small dots on it", "above a black and white image of a square with a white cross in the center"], "captions_pred_image": ["a 3d model of a chalkboard on a pedestal royalty free 3d model preview no.2", "a 3d model of a spiral staircase"], "question": "which object has a red arrow?", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " a multicolored metal building structure with a roof."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "22483891fd124baca3bbc6a6a49adc9c"], "properties": ["roof truss, insulation, suspended ceiling", "color, roof, structure"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "of a black and white photo of a bike on a white background"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d model of a barn structure"], "question": "which structure has a roof", "label": 1}, {"captions": ["a chocolate cake with a mushroom on top, displayed on a stand.", " a small house with a blue roof, a door, and a pool."], "sample_ids": ["728e8c8600584eacae80208bba7eece4", "40c52c2d278345c5b4e8d00a991271dc"], "properties": ["chocolate, mushroom, cake", "door, roof, pool"], "captions_pred_pc": ["a black and white image of a toilet paper holder", "of a black and white photo of a window with fringes"], "captions_pred_image": ["a mushroom on top of a cake royalty free 3d model preview", "a 3d model of a small house"], "question": "which entity has a door?", "label": 1}, {"captions": ["a featuring a robot, a black and white box, a man with a suitcase, a door, a large white building, and a white shelf.", " a staircase with a railing, table, and chair, featuring a square ceiling light."], "sample_ids": ["4be12bf79ead47a68cd67fc01a6e5c8c", "51a0fba79bce472a8b827b78a110b77f"], "properties": ["a, door, suitcase", "stair, table, chair"], "captions_pred_pc": ["a black and white drawing of a dog in a cage", "for a black and white image of a toilet paper holder"], "captions_pred_image": ["a 3d model of a robot standing in front of an open door", "a 3d model of a staircase in a room"], "question": "which entity has a table?", "label": 1}, {"captions": [" featuring a christmas train track, snowman with santa claus, mini golf course, and lego race track in a lego city setting.", " a small wooden cabinet with a drawer, doubling as a bedside table."], "sample_ids": ["9d03ee6c9fcc458e9485aa7cb0d1f4cb", "3755f3c19ae549c4bf708462db1b2581"], "properties": ["a, city, train", "Cabinet, drawer, wood"], "captions_pred_pc": ["a black and white photograph of a piece of paper", "a black and white image of a square with dots all over it"], "captions_pred_image": ["a 3d model of a toy train set on a table", "a 3d model of a wooden box with a lid"], "question": "which object is made of wood", "label": 1}, {"captions": ["a 3d low poly model of an orange with a green leaf and stem.", " a wooden object, including a board, piece of wood, box, and shelf."], "sample_ids": ["cdd918943a124d3f94859d9f67ab8e3e", "c986212445a1466ca7be7b5ac6bea729"], "properties": ["3d, low, poly", "wood, board, shelf"], "captions_pred_pc": ["a black and white illustration of a hexagonal shape with dots", "a black and white drawing of snowflakes on a white background"], "captions_pred_image": ["a 3d model of a paper airplane flying in the air", "a 3d rendering of a piece of marble"], "question": "which object is made of wood", "label": 1}, {"captions": ["a featuring a graffiti-covered train, bench, wall, and skateboard.", " a house with a yellow roof, wooden beams, and yellow frames."], "sample_ids": ["6de9fcac063d45df9424decdc215b379", "703dce44052e48cfb024bceb08141554"], "properties": ["graffiti, bench, wall", "roof, beams, frames"], "captions_pred_pc": ["for a black and white image of a boat", "a black and white drawing of a boat"], "captions_pred_image": ["a graffiti-covered wall in a black and white photograph", "a 3d model of a wooden structure"], "question": "which entity has a yellow roof", "label": 1}, {"captions": [" a spiral staircase with a railing in a small building.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["28cae056856c4a8ba9d1a6af5355f831", "6b745457e06840119058883b35f78f58"], "properties": ["staircase, railing, building", "roof, color, blue"], "captions_pred_pc": ["a black and white photograph of a light switch", "a black and white image of a building with dots"], "captions_pred_image": ["a 3d model of a staircase in a white room", "a 3d model of a house with a steeple on top"], "question": "which building has a blue roof", "label": 1}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", "a featuring white and red cubes, and a pink and white chair."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "f2c44a82ba744ba8b93e9a1c2272c117"], "properties": ["color, material, structure", "color, white, red, pink"], "captions_pred_pc": ["a black and white drawing of a room with dots", "a black and white illustration of a house made of dots"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a 3d model of a white structure with stairs"], "question": "which entity is a chair?", "label": 1}, {"captions": ["a featuring a boat, table, chairs, umbrella, and solar panel.", " a suitcase on wheels, featuring a bird, a laptop, a robot, and a vacuum cleaner."], "sample_ids": ["0f0eb3a198d341d28f809b6d7634be8a", "e8100bef7b8a48d4ac79684bffb349ba"], "properties": ["boat, table, chairs, umbrella, solar panel", "Wheels, laptop, robot"], "captions_pred_pc": ["a black and white illustration of a boat with an umbrella", "a black and white drawing of a cell phone"], "captions_pred_image": ["a 3d model of a boat, a table, chairs, and an umbrella", "a black and white image of a suitcase on wheels"], "question": "which entity has a laptop?", "label": 1}, {"captions": [" of a wooden cabinet with drawers and filing features.", " of a plague mask with a rusty, horned, wooden helmet and a crow's head design."], "sample_ids": ["abbc90bbd5474f73b16482ccd10e07ec", "2b0896f810074399a5ae7d6dbab8c330"], "properties": ["Cabinet, Drawers, Filing", "- material is wood, rusty, horned"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a white wooden chest of drawers", "3d model of a plague doctor's mask"], "question": "which entity is made of wood", "label": 1}, {"captions": ["a 3d object featuring a white door, box, wall with hooks, plastic bucket, sheet of paper, curved wall, and paper with a hole.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["cbbcf78010e34fa9b2e963452d081eb7", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["a, box, wall", "house, fence, playground"], "captions_pred_pc": ["a black and white illustration of a flower", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a 3d rendering of a white wall with two hooks hanging from it", "a 3d model of a room with a lot of wires"], "question": "which entity has a fence", "label": 1}, {"captions": [" a large house/building structure with a roof.", " a building with a metal and wooden pole structure."], "sample_ids": ["82859e4c6d4e4bbea94b6252bef1d398", "e2e2ab4474b84f33809979da457eedd9"], "properties": ["roof, structure, house", "structure, material, pole"], "captions_pred_pc": ["a black and white photograph of a metal sculpture", "a black and white illustration of a line of dots on a white background"], "captions_pred_image": ["a 3d model of a large white structure", "a 3d model of a structure with multiple tables and chairs"], "question": "which structure is made of metal and wooden poles", "label": 1}, {"captions": [" a small village featuring houses, trees, and a winding road.", " a house with a wooden-framed roof structure."], "sample_ids": ["7acf46c0265d4e39b97ac084852abde8", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["houses, trees, road", "roof, material, wood"], "captions_pred_pc": ["in 15 words or less a black and white photo of a mountain landscape", "a black and white drawing of a staircase"], "captions_pred_image": ["a black and white photograph of a small town", "a 3d model of a building with a roof"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": [" a building with a purple roof, featuring a ceiling with wooden beams.", " a multicolored cube representing a protein, featuring pink, yellow, red, and green hues."], "sample_ids": ["b0cb5cd2fdca4bd5bdf96dd5c0cc13b5", "ee7c3113f2754f9cbe8980b1b7cc4eff"], "properties": ["roof, purple, beams", "color, shape, color"], "captions_pred_pc": ["the letter l made of snowflakes on a white background royalty free illustration", "a black and white pattern of small dots on a white background a black and white pattern of small dots on a white background illustration"], "captions_pred_image": ["a 3d model of the roof structure royalty free 3d model no.", "a 3d model of a piece of fabric"], "question": "which entity is a cube?", "label": 1}, {"captions": [" a set of yellow and black tools, including a magnifying glass, hexagonal keys, and screwdrivers.", "a pile of mint green soap cubes."], "sample_ids": ["40285a60f32749a8ae38957c7b073fe8", "edd35e0657b640b1b8fcf86942e1a9e5"], "properties": ["color, yellow, black", "color, mint, soap"], "captions_pred_pc": ["a set of three black and white keychains on a white background", "a black and white illustration of a group of dots on a white background"], "captions_pred_image": ["a 3d model of a pair of hexagonal keys and a pair of hexagonal scissors royalty free 3d model preview no 3", "a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background 3d illustration of a pile of white cubes on a gray background"], "question": "which object is made of soap", "label": 1}, {"captions": [" of a wooden cabinet with drawers and filing features.", " a small white house with a staircase and a window."], "sample_ids": ["abbc90bbd5474f73b16482ccd10e07ec", "9eb88d17310d42dda9e17883e9922525"], "properties": ["Cabinet, Drawers, Filing", "house, staircase, window"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white illustration of a person standing in the middle of a crowd of tiny black dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a white wooden chest of drawers", "a 3d rendering of a small room with a staircase"], "question": "which entity is a building?", "label": 1}, {"captions": [" of a multi-colored spooling machine with wires and circuit board, featuring a lamp and shelf with toys.", " a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles."], "sample_ids": ["0e3f5cc16806492b948d41a748819ce3", "bd7aab78974643f5a0660c699daf8eb3"], "properties": ["color, shape, material", "roof, color, yellow"], "captions_pred_pc": ["a black and white image of a decorative tile", "a black and white drawing of a room"], "captions_pred_image": ["a 3d rendering of an electronic device on a white surface", "a 3d model of a table and chairs on a white background"], "question": "which entity has a roof that is yellow", "label": 1}, {"captions": ["a featuring a black and white sailboat, a black and green bird, a triangular lamp, a helicopter, and a kite.", " a small purple plastic chair with four legs."], "sample_ids": ["a59efdbb28a241b8866bfc094c8c80b2", "1bb40ec897884b788dc0a2dac090f347"], "properties": ["a, bird, lamp", "color is purple, material is plastic, number of legs is four"], "captions_pred_pc": ["a silhouette of a person standing in the air", "of a black and white image of a baby carrier"], "captions_pred_image": ["a black and white image of a futuristic sculpture", "a 3d model of a white plastic chair"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a small white and brown house with windows and a door.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["7de99253a4bc4d98bd941e40bbad8c7a", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["color, door, window", "house, fence, playground"], "captions_pred_pc": ["of a person wearing a black and white striped shirt", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a 3d model of a police box royalty free 3d model preview no.2", "a 3d model of a room with a lot of wires"], "question": "which house has a fence", "label": 1}, {"captions": [" a rusted metal barrel with a yellow and red warning sign and stripe on it.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["5a49ad82ef7a4d33badea2261720f518", "c3a82df41875402285608ef13a55df57"], "properties": ["rusty, warning, metal", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white drawing of dots on a white background a black and white drawing of dots on a white background royalty free illustration", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a black and white photograph of a barrel", "a white plastic object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": [" of a white plastic tube or metal bar, resembling a knife.", "a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier."], "sample_ids": ["8fd3836862a44a8d8b4d224bfc30c2c3", "ef8288c9fdfc4e0f9c1fe25d570a104e"], "properties": ["a knife, blade, handle", "color is white, yellow, plastic"], "captions_pred_pc": ["a black and white image of a shelf with a white background", "a black and white image of a metal bowl with dots"], "captions_pred_image": ["a piece of white plastic on a gray background", "a white plastic container with a label on it"], "question": "which object is white", "label": 1}, {"captions": [" a green, spiked cube-like creature with eyes and a liquid-like texture.", "star wars stormtrooper "], "sample_ids": ["0d2850148a154f3ca72d72817e120759", "05678f11f4fe47178c9b4941ee334e16"], "properties": ["texture, spikes, eyes", "a, color, white"], "captions_pred_pc": ["a black and white drawing of a flower", "a black and white illustration of a stormtrooper"], "captions_pred_image": ["a 3d model of an ice cube on a white background royalty-free 3d model preview no. 3", "a 3d model of a star wars stormtrooper"], "question": "which entity is white", "label": 1}, {"captions": [" of a white tiled floor with a ceiling light and small holes.", " a white castle composed of small cubes."], "sample_ids": ["9906caefe141465990aacb312e1025f0", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["light, floor, ceiling", "composed of, white, cubes"], "captions_pred_pc": ["a black and white polka dot pattern on a white background", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["a 3d model of a white tile floor", "a 3d model of a castle made of white cubes"], "question": "which entity is composed of white cubes", "label": 1}, {"captions": [" a green, spiked cube-like creature with eyes and a liquid-like texture.", "a 3d wooden sign featuring various texts, including \"resin park,\" \"shabaab corporation,\" and \"johnson sammons.\""], "sample_ids": ["0d2850148a154f3ca72d72817e120759", "34a49861e7744acfb71de471a755e917"], "properties": ["texture, spikes, eyes", "text, material, shape"], "captions_pred_pc": ["a black and white drawing of a flower", "a black and white image of a metal bar with text on it"], "captions_pred_image": ["a 3d model of an ice cube on a white background royalty-free 3d model preview no. 3", "a 3d rendering of a marble slab with text on it"], "question": "which entity is a sign?", "label": 1}, {"captions": [" a spacious office featuring numerous desks, chairs, computers, and a ceiling with blue and white triangles and a ceiling light.", " a multicolored cube representing a protein, featuring pink, yellow, red, and green hues."], "sample_ids": ["0d7e4d9471414a21b4a5b18a54f7ec22", "ee7c3113f2754f9cbe8980b1b7cc4eff"], "properties": ["ceiling, light, desks", "color, shape, color"], "captions_pred_pc": ["a black and white drawing of a square on a white background", "a black and white pattern of small dots on a white background a black and white pattern of small dots on a white background illustration"], "captions_pred_image": ["a 3d model of an office space with desks and chairs", "a 3d model of a piece of fabric"], "question": "which entity is a cube?", "label": 1}, {"captions": [" of a blue tarp, flower, small island with a boat and phone, and a tent with a blue blanket.", " a house with a wooden-framed roof structure."], "sample_ids": ["94704d86c22c4bdfb86ac24979926066", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["blue tarp, flower, small island", "roof, material, wood"], "captions_pred_pc": ["above a 3d rendering of a fish in the air", "a black and white drawing of a staircase"], "captions_pred_image": ["a black and white image of a piece of paper on the ground", "a 3d model of a building with a roof"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": [" a wooden-framed house with roof trusses.", " a small house with a roof."], "sample_ids": ["e60dd370c5ec468da4689a801f951157", "9578e8de15ec44ce802072aaa4df3910"], "properties": ["frame, roof, trusses", "roof, house, small"], "captions_pred_pc": ["a black and white drawing of a metal grate", "above a black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and"], "captions_pred_image": ["a 3d model of a house under construction", "a 3d model of a small house"], "question": "which house has a roof", "label": 1}, {"captions": ["a 3d white object resembling a sphere, frog, egg, hat, and shell.", " a small white house with windows and a black lid."], "sample_ids": ["0a8e0b95d8ce43ee9159ad01d925aad8", "4b40af369c1149949f5ccb68becd8430"], "properties": ["shape is sphere, color is white, material is plastic", "white, windows, lid"], "captions_pred_pc": ["a black and white illustration of a sponge in the shape of a sponge sponge black and white illustration of a sponge in the shape of a sponge royalty free illustration", "above a black and white image of dots on a white background"], "captions_pred_image": ["a 3d sculpture of an apple on a white background", "a 3d model of a white house with three windows"], "question": "which object is white", "label": 1}, {"captions": [" of a wooden stand featuring two legs, six balls, and a row of teddy bears, resembling a puzzle toy, game board, and bookshelf.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["e36ba9c060cd49f48a0acc1790fcf049", "97e000ff41094665afd94ea565da8b13"], "properties": ["resembles, toy, bookshelf", "roof, material, wood"], "captions_pred_pc": ["a black and white image of a book cover", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d rendering of a wooden display stand with six cups on it", "a 3d model of the roof of a building"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a futuristic space station featuring a bench, computer desk with a laptop, small coffee machine, printer, and computer monitor.", " a large house with a roof on a platform."], "sample_ids": ["9b8e2f9070b24956a343a01a5fabdf03", "cb3e09a301b746918a682a595037c7f7"], "properties": ["computer desk, laptop, monitor", "roof, platform, house"], "captions_pred_pc": ["a black and white silhouette of a traditional japanese gate", "a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of a futuristic room with a bench", "a 3d model of a small house"], "question": "which entity has a roof", "label": 1}, {"captions": [" a building with yellow lines.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["f18e34286cf54876874f55ecc9018492", "06a1c233fb444830b577aa06e2c01294"], "properties": ["color, yellow, lines", "house, tree, hill"], "captions_pred_pc": ["a black and white drawing of a map", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a drawing of an airplane flying over a city", "a black and white image of a house in the middle of a field"], "question": "which entity has more trees", "label": 1}, {"captions": ["purple speech bubble and circle", "a low-poly of a white, shattered sphere."], "sample_ids": ["382b2c7ab4e14027a89be90966795733", "94119660e7054fc5b7baa68a4e39968c"], "properties": ["color, shape, size", "sphere, color, texture"], "captions_pred_pc": ["a black and white speech bubble on a white background", "a black and white illustration of a dandelion on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white"], "captions_pred_image": ["a gray circle on a gray background with a small white dot in the center of the circle", "a 3d model of a cracked egg"], "question": "which entity is a sphere?", "label": 1}, {"captions": [" a small, rusty toy character with a hat, dart, dartboard, and leaves, standing on a rock.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["a24872444e5c468a9264d503a9ca7276", "c3a82df41875402285608ef13a55df57"], "properties": ["rusty, hat, dartboard", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["in 15 words or less an illustration of a 3d character on a white background stock illustration", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d sculpture of a teddy bear standing on a pedestal", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a pyramid with blue and pink lines, wires, and mesh.", " a large metal building with a roof and truss structure."], "sample_ids": ["a3b2db8d5c6044f88b275839d0cd71bd", "b85a99699ccd4bcba213322113bb253d"], "properties": ["color, shape, material", "roof, truss, structure"], "captions_pred_pc": ["a black and white image of a patterned rug", "of a metal grate on a white background"], "captions_pred_image": ["a 3d model of the pyramid roof royalty-free 3d model preview no.1", "a 3d model of a long metal fence"], "question": "which entity is made of metal", "label": 1}, {"captions": [" of a green tent with a green cover and white awning.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["5168d23199604fa6b8fd982c2e2cf9e9", "a17477b445b3443189dad22f768b888b"], "properties": ["color, awning, cover", "roof, pillar, stairs"], "captions_pred_pc": ["a black and white illustration of a pyramid made of dots", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d model of a skateboard ramp royalty free 3d model preview no.2", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": [" a small building with stairs and a glass floor, featuring a square table and a black square ceiling light.", " a large steel building with a pool."], "sample_ids": ["8aaad713b8834739b008ccf2f3d86cce", "72eed67d8d884c819b28e2e95eb48f06"], "properties": ["floor, table, light", "building material, pool, steel"], "captions_pred_pc": ["above a black and white photograph of a window", "a black and white illustration of a building with a tree growing out of it"], "captions_pred_image": ["a black and white 3d model of a staircase on a platform", "a 3d model of a concrete structure"], "question": "which building has a pool", "label": 1}, {"captions": [" a robot with a blue, purple, and white body.", " a small blue robot with a windmill."], "sample_ids": ["6f98acb9e03c4cbd9c83f2c8f9cd3ddc", "5f915b161cc74fedad52d8663b41f87a"], "properties": ["body, color, white", "color, blue, robot"], "captions_pred_pc": ["above a black and white image of a robot", "a black and white image of a 3d sculpture"], "captions_pred_image": ["a 3d model of a robot standing in the middle of a white background", "a 3d model of a wind turbine"], "question": "which robot is blue?", "label": 1}, {"captions": [" of a large black mat with square grid design.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["72aac2e9ccd7482eb88e5e4bc204fbf3", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["size, color, design", "roof, color, yellow"], "captions_pred_pc": ["a black and white pattern on a white background", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a black rubber mat on a gray surface", "a 3d model of a house with a roof"], "question": "which is not a building", "label": 1}, {"captions": [" a polygonal owl, stone block, and helmet, featuring triangular details and a rock with a hole.", " a polygonal owl, stone block, and helmet, featuring triangular details and a rock with a hole."], "sample_ids": ["a78520b929314d7c8477322e000bc481", "a78520b929314d7c8477322e000bc481"], "properties": ["polygonal, helmet, triangular details", "polygonal, helmet, triangular details"], "captions_pred_pc": ["a black and white image of a piece of fabric with dots", "a black and white image of a piece of fabric with dots"], "captions_pred_image": ["a 3d model of a gray, triangular shaped object", "a 3d model of a gray, triangular shaped object"], "question": "which entity has a polygonal owl", "label": 0}, {"captions": [" a white and black chair with a black handle and armrest.", "a white glass beer mug."], "sample_ids": ["cf1f435c54b046f68d6603cd3369a94f", "1d686cbd3e9a4c629a43088658989286"], "properties": ["color, black, white, handle, armrest", "color, white, glass"], "captions_pred_pc": ["a black and white drawing of a geometric shape", "a black and white drawing of a beer mug on a white background"], "captions_pred_image": ["a 3d rendering of a white chair with a black handle", "a 3d model of a glass pitcher"], "question": "which object is made of glass", "label": 1}, {"captions": [" a house with a flat roof structure.", " a house with a roof and beams."], "sample_ids": ["abc52d210d71415296730bb00352ce6f", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["roof, flat, structure", "roof, beams, house"], "captions_pred_pc": ["a black and white drawing of a window with dots around it", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a house with a roof", "a 3d model of a building with a roof"], "question": "which house has a roof with beams", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " a small wooden house."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "4cb4dba1237443eb8dc299530fa12521"], "properties": ["roof truss, insulation, suspended ceiling", "house, material, wood"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "a house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d model of a small cottage"], "question": "which house is made of wood", "label": 1}, {"captions": [" of a white supreme logo t-shirt, low poly design.", " a white castle composed of small cubes."], "sample_ids": ["bea8441c08d94366b96b53775391d8e6", "d0d00e27b8744e29a0a869c5ca277197"], "properties": ["color, white, logo", "composed of, white, cubes"], "captions_pred_pc": ["for a black and white image of a shirt with dots", "a black and white illustration of the letter 'k'"], "captions_pred_image": ["a 3d model of a white t-shirt with a supreme logo", "a 3d model of a castle made of white cubes"], "question": "which entity is composed of white cubes", "label": 1}, {"captions": [" of a wooden nightstand with a hexagonal pattern, white top, and drawer.", "a black and white of a knife/sword with a handle."], "sample_ids": ["e6f2dbec6d464b4da4aa47bce242f6e5", "c7692fa635e049bda0a2039fa5a784a4"], "properties": ["Drawer, Pattern, White", "image, color, black and white"], "captions_pred_pc": ["a cross made of dots on a white background vector illustration of a cross made of dots on a white background royalty free illustration illustration of a cross made of dots on a white background vector illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a", "of a black and white knife on a white background"], "captions_pred_image": ["a 3d rendering of a wooden box with a handle", "a black and white image of a knife"], "question": "which entity is not a color image?", "label": 1}, {"captions": ["a piece of rock with a blue stick.", " a clay pot with holes in it."], "sample_ids": ["8a8b16deb8794d4b812a6b36cc6a3cd9", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["color, rock, stick", "hole, material, clay"], "captions_pred_pc": ["a black and white illustration of dots on a white background a black and white illustration of dots on a white background stock illustration", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["a black and white photograph of a rock formation", "a clay sculpture of a face with holes in it"], "question": "which object is made of clay", "label": 1}, {"captions": ["a red circle, red and white arrow, mouse, and child's handwriting.", " a large building featuring stairs, a clock tower, a balcony, and a roof."], "sample_ids": ["3c233f87bf264968a7f0660b9eac9e4a", "e7c78316f9cb4b8aad57a9c933f5278b"], "properties": ["red, mouse, handwriting", "building, roof, balcony"], "captions_pred_pc": ["in 15 words or less a black and white image of a person's hand holding a pencil and drawing on a piece of paper", "a black and white illustration of a group of dots in the shape of a square on a white background royalty free illustration"], "captions_pred_image": ["a black and white drawing of a person's hand holding a pencil", "a 3d model of a building with a clock tower"], "question": "which entity has a roof", "label": 1}, {"captions": ["a 3d rendered coffee table with a black and brown base and a square ceiling light.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["27a365f067004d9c9c58e40c12827ce0", "bded33af34104b9686b845dfd18309a9"], "properties": ["baseColor, black, brown, tableTopColor, brown, black", "table, staircase, light"], "captions_pred_pc": ["a black and white image of a patterned square on a white background", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d model of a coffee table", "a 3d model of a small table with a staircase"], "question": "which table has a staircase?", "label": 1}, {"captions": ["a 3d white rectangular box with multiple compartments and a white lid.", " a tree with green leaves."], "sample_ids": ["966f6e9324a74d90831924895d3f2e8c", "1a902b67735845198efb1b84f434b580"], "properties": ["size, color, shape", "leaf, color, green"], "captions_pred_pc": ["a black and white illustration of a patterned rug", "for a black and white drawing of a tree"], "captions_pred_image": ["a 3d rendering of a white cardboard box on a gray background", "a 3d model of a tree with a lot of foliage"], "question": "which entity is green", "label": 1}, {"captions": ["a pair of blue wireless earbuds with a black case.", "a pair of blue wireless earbuds with a black case."], "sample_ids": ["88d8bccf701145dea3fc4df8781ddb13", "88d8bccf701145dea3fc4df8781ddb13"], "properties": ["blue, case, earbuds", "blue, case, earbuds"], "captions_pred_pc": ["a black and white image of a pair of earrings in the shape of a butterfly", "a black and white image of a pair of earrings in the shape of a butterfly"], "captions_pred_image": ["a 3d model of a pair of earphones royalty free 3d model preview no.1", "a 3d model of a pair of earphones royalty free 3d model preview no.1"], "question": "which pair of earbuds has a black case?", "label": 0}, {"captions": [" a building featuring a window with pink bars and an orange-pink frame, pink and purple lines, and a room with a wall and a door.", "star wars stormtrooper "], "sample_ids": ["4f21fa937959487a96350af527d75421", "05678f11f4fe47178c9b4941ee334e16"], "properties": ["window, frame, room", "a, color, white"], "captions_pred_pc": ["a black and white drawing of a metal cage", "a black and white illustration of a stormtrooper"], "captions_pred_image": ["a 3d model of a wall made up of many vertical bars", "a 3d model of a star wars stormtrooper"], "question": "which entity is white", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "97e000ff41094665afd94ea565da8b13"], "properties": ["yellow, table, roof", "roof, material, wood"], "captions_pred_pc": ["a black and white drawing of a floor plan", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of the roof of a building"], "question": "which roof is made of wood", "label": 1}, {"captions": ["s of a plane, house, pile of rocks, and islands, accompanied by watercolor paintings of a tree, rocks, and a boat in a sand dune.", " a concrete wall with peeling paint and rusted metal features."], "sample_ids": ["8da8da6ccf5f4011a4115977c55d1cb8", "4376831ff557462dbacc4cce88a8cc86"], "properties": ["s, plane, house", "paint, rust, concrete"], "captions_pred_pc": ["a black and white silhouette of a map", "above a black and white image of a shelf on a white background"], "captions_pred_image": ["a black and white image of a small island in the middle of a body of water", "a 3d model of a concrete wall"], "question": "which entity has more paint", "label": 1}, {"captions": [" of a black shark", " of a plague mask with a rusty, horned, wooden helmet and a crow's head design."], "sample_ids": ["01b98721613b4c0ea23982c048955c1f", "2b0896f810074399a5ae7d6dbab8c330"], "properties": ["black, mouth, teeth", "- material is wood, rusty, horned"], "captions_pred_pc": ["a black and white image of an object on a white background", "in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a black shark on a gray background royalty free 3d model preview no.2", "3d model of a plague doctor's mask"], "question": "which entity is not made of wood", "label": 1}, {"captions": [" of a house with a pink roof.", " a house with a roof and beams."], "sample_ids": ["6162909df6294848a8eea83c3aa9585b", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["color, roof, pink", "roof, beams, house"], "captions_pred_pc": ["a black and white drawing of the letter 'p' on a white background illustration", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a house in the style of the 1920s and 1930s", "a 3d model of a building with a roof"], "question": "which house has a roof with beams", "label": 1}, {"captions": ["smiley-faced banana .", " of a plague mask with a rusty, horned, wooden helmet and a crow's head design."], "sample_ids": ["cc0099a687194a31a052ac761f5fdfea", "2b0896f810074399a5ae7d6dbab8c330"], "properties": ["face is smiling, banana is yellow, smiley face is a sticker", "- material is wood, rusty, horned"], "captions_pred_pc": ["above a black and white image of a surfboard on a white background", "in 15 words or less a black and white image of a face made up of tiny dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a banana with a smiling face", "3d model of a plague doctor's mask"], "question": "which object is made of wood", "label": 1}, {"captions": ["a 3d cartoon character wearing a hat and holding various items, such as a sword, gun, broomstick, and stick.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["9863dee114264b89a95dca4c78d08424", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["hat, sword, gun", "island, terrain, water"], "captions_pred_pc": ["for a black and white illustration of a small monster holding a spoon in one hand and a cup of coffee in the other", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a police officer holding a baton and wearing a hat and helmet", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" a staircase with a yellow railing, accompanied by a chair with a glass top, a square light fixture, and a kitchen hood in a small building.", " a small table with stairs, a white top, and an accompanying platform, featuring a square ceiling light fixture."], "sample_ids": ["ddaac41b6efd4dd0b7bf8d441441555e", "6d0479f5b6d74e7ab69fad4091259e6c"], "properties": ["railing, chair, light", "light, fixture, square"], "captions_pred_pc": ["above a black and white image of a letter 'g'", "above a black and white photograph of an object in the middle of a field"], "captions_pred_image": ["a 3d model of a bench on a white background royalty free 3d model no.3", "a 3d model of a desk with a table on top"], "question": "which entity has a light fixture that is not square?", "label": 0}, {"captions": [" a black and white object, possibly a car, arrow, or light fixture.", "a white glass beer mug."], "sample_ids": ["ff2fc36f68c642b6940407bed2ee6917", "1d686cbd3e9a4c629a43088658989286"], "properties": ["color, black, white", "color, white, glass"], "captions_pred_pc": ["for a black and white image of a pair of scissors", "a black and white drawing of a beer mug on a white background"], "captions_pred_image": ["a 3d model of an airplane on a white background", "a 3d model of a glass pitcher"], "question": "which object is made of glass", "label": 1}, {"captions": [" a small black, white, and orange walkie-talkie.", " a house with a roof and beams."], "sample_ids": ["e098706085a44898abbd549693d12a64", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["color is black, white, orange", "roof, beams, house"], "captions_pred_pc": ["of a white and black clutch with a flower design", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d model of a walkie-talkie royalty free 3d model preview no 3", "a 3d model of a building with a roof"], "question": "which entity is a building?", "label": 1}, {"captions": ["a 3d object featuring a fish with a long nose and open mouth, a cat head with red, yellow, and green stripes, a frog with a long snout, a white horse with green and yellow stripes, and a wolf mask.", " a clay pot with holes in it."], "sample_ids": ["276699bb0f974c47b4e2954cfcd1651c", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["a, color, white", "hole, material, clay"], "captions_pred_pc": ["a black and white image of a skull in the shape of a butterfly", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of an animal with a long nose", "a clay sculpture of a face with holes in it"], "question": "which entity is made of clay", "label": 1}, {"captions": [" a small white house with a staircase and a window.", " a small white building with stairs and shelves."], "sample_ids": ["9eb88d17310d42dda9e17883e9922525", "9e1f64d4fd514059be934077717536dc"], "properties": ["house, staircase, window", "building, stairs, shelves"], "captions_pred_pc": ["a black and white illustration of a person standing in the middle of a crowd of tiny black dots on a white background royalty free illustration", "a black and white image of a person standing in front of a white background"], "captions_pred_image": ["a 3d rendering of a small room with a staircase", "a white 3d model of a building with stairs"], "question": "which building has stairs and shelves?", "label": 1}, {"captions": ["a small 3d purple teapot and elephant.", " a flying bird, resembling a crow and a pigeon."], "sample_ids": ["4a27592dc8164f709b44446ee11832c0", "5ec78c8b6ab54f739adb0b46d216a454"], "properties": ["color, shape, material", "bird, resembles, crow, pigeon"], "captions_pred_pc": ["a black and white 3d illustration of an elephant's head on a white background 3d illustration of an elephant's head on a white background royalty free illustration", "above a black and white illustration of an airplane on a white background"], "captions_pred_image": ["a white ceramic teapot on a gray background", "a black and white image of a bird in flight"], "question": "which entity is a bird?", "label": 1}, {"captions": ["s of a cat, toilet, white bird with black eyes, stuffed animal, man in a white shirt, paper airplane, and cow.", " a small building with a yellow roof, featuring a box, a chair, and a plane flying overhead."], "sample_ids": ["7adf9de5fb734455a3a3a7f084e3d628", "a2354f13774340d392fbf33564934aab"], "properties": ["cat, toilet, white bird with black eyes, stuffed animal, man in a white shirt, paper airplane, cow", "building, roof, yellow"], "captions_pred_pc": ["a black and white image of a flying saucer", "a black and white image of a cell phone"], "captions_pred_image": ["a black and white image of a fighter plane flying upside down", "a 3d rendering of a machine with a conveyor belt"], "question": "which entity has a roof", "label": 1}, {"captions": [" of a wooden windmill with a red roof.", " a house featuring a roof with wooden trusses and a ladder."], "sample_ids": ["2ad8fca30285483d8b7f602fa078215d", "3cd410c4359a4cef98702963a2b9802b"], "properties": ["roof, color, red", "roof, trusses, ladder"], "captions_pred_pc": ["a white and black image of a snowflake in the shape of a snowflake", "a black and white drawing of a tv screen on a white background"], "captions_pred_image": ["a 3d model of a windmill on a gray background", "a 3d model of the roof of a building"], "question": "which roof is made of wood", "label": 1}, {"captions": [" of a white wall-mounted light switch, electrical outlet, and various furniture pieces.", "a white of a city with buildings and a gold spoon."], "sample_ids": ["b195bf7ba6094e1b812e4312deeeb360", "2351471a2d2145c59fec5f68ffae4816"], "properties": ["light switch, electrical outlet, furniture", "image, city, spoon"], "captions_pred_pc": ["a black and white image of a pair of sunglasses", "a black and white image of a diamond shaped piece of fabric"], "captions_pred_image": ["a 3d rendering of a room with a white background", "a 3d model of a city skyline in white"], "question": "which image shows a city with buildings and a gold spoon?", "label": 1}, {"captions": [" a small white bookcase-like building with stairs and a light fixture.", " a small white building featuring a room with a door and a white shelf."], "sample_ids": ["5f99eb9d1f1e4d57b5690446f832c841", "d7b78fa9a6b64f6095b881bc619b04fe"], "properties": ["building, color, white", "room, door, shelf"], "captions_pred_pc": ["in 15 words or less a black and white image of the letter 'f' made up of dots", "above a black and white illustration of a person standing in front of a door"], "captions_pred_image": ["a 3d model of a bookshelf on a white background", "a 3d model of an empty room"], "question": "which building has a room with a door and a white shelf?", "label": 1}, {"captions": [" a green, spiked cube-like creature with eyes and a liquid-like texture.", " a potted christmas pine tree."], "sample_ids": ["0d2850148a154f3ca72d72817e120759", "460c8f3034a844159826fac3b8aa35a5"], "properties": ["texture, spikes, eyes", "a, color, green"], "captions_pred_pc": ["a black and white drawing of a flower", "a black and white illustration of a snowflake on a white background"], "captions_pred_image": ["a 3d model of an ice cube on a white background royalty-free 3d model preview no. 3", "a 3d model of a christmas tree in a vase"], "question": "which entity is greener", "label": 0}, {"captions": [" a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole.", " a small wooden house."], "sample_ids": ["b16fb21cda9a4a21a024df749c2304f4", "4cb4dba1237443eb8dc299530fa12521"], "properties": ["roof, ceiling, hole", "house, material, wood"], "captions_pred_pc": ["a black and white image of a square with dots on it", "a house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots"], "captions_pred_image": ["a 3d model of a small house and a tree in the foreground", "a 3d model of a small cottage"], "question": "which house is made of wood", "label": 1}, {"captions": ["3d silver sculpture resembling a triangular wing with a white arrow design.", " a white and gold mirror on a wooden easel stand."], "sample_ids": ["578cb910905042939d876af28e29eb2f", "0d10d734448d4a5d8d07b938c12d9d80"], "properties": ["wing, silver, white", "color, white, gold"], "captions_pred_pc": ["a black and white illustration of a pair of hands", "for a black and white image of a shoe on a white background"], "captions_pred_image": ["a white paper airplane flying against a gray background", "a 3d model of a standing mirror on a white background"], "question": "which object is white and gold?", "label": 1}, {"captions": [" of a japanese-style pagoda house in a pixelated village.", " a small white house with stairs, featuring a table, chair, and white box, along with a ceiling light."], "sample_ids": ["42c4e6ca4a0c4b7b9a97d543b2442222", "5ea0962b100b4fccb761ed84afe027b5"], "properties": ["image is a japanese-style pagoda house in a pixelated village", "house, table, chair"], "captions_pred_pc": ["a black and white drawing of a square on a white background stock illustration \u00a9 2019 iStock", "above a black and white photograph of an open door"], "captions_pred_image": ["a 3d model of a japanese temple and pagoda", "a 3d rendering of a small white table with a chair"], "question": "which house has a table and chair?", "label": 1}, {"captions": ["a chocolate cake with a mushroom on top, displayed on a stand.", " a brick wall with grass."], "sample_ids": ["728e8c8600584eacae80208bba7eece4", "53f2d948091f417cb580e22469c94db2"], "properties": ["chocolate, mushroom, cake", "brick, grass, wall"], "captions_pred_pc": ["a black and white image of a toilet paper holder", "above a black and white illustration of an underwater scene"], "captions_pred_image": ["a mushroom on top of a cake royalty free 3d model preview", "a black and white photo of a brick wall and a puddle"], "question": "which object is more likely to be in a garden", "label": 1}, {"captions": [" a sword with a wooden handle.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["d42eec2b8e7644ec9ededcf9f325faad", "c3a82df41875402285608ef13a55df57"], "properties": ["handle, material, wood", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white image of a knife on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a black and white image of a sword", "a white plastic object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a human foot with a red, white, and pink bone structure, including a skull with red and white details.", "a 3d low poly model of a hammer and an axe with wooden handles."], "sample_ids": ["39cdff793d3f4dcd898dd6b5222cb289", "c4e45a41478e42418399074b88d8920f"], "properties": ["color, shape, size", "axe, handle, wood"], "captions_pred_pc": ["a black and white image of an ornate design on a white background", "for a black and white image of a bird's wing"], "captions_pred_image": ["a 3d model of a human bone structure", "a 3d model of a knife on a white background"], "question": "which object has a handle made of wood", "label": 1}, {"captions": [" a small building with a roof.", " a large metal building with a roof and truss structure."], "sample_ids": ["a3089017e4c5463d852de65abf61eda5", "b85a99699ccd4bcba213322113bb253d"], "properties": ["roof, building, small", "roof, truss, structure"], "captions_pred_pc": ["in 15 words or less a black and white image of a building with dots all over it", "of a metal grate on a white background"], "captions_pred_image": ["a black and white image of a small house", "a 3d model of a long metal fence"], "question": "which building has a roof", "label": 1}, {"captions": [" a table with two bowls and a cup on it.", " a small white house with windows and a black lid."], "sample_ids": ["41842c8d2ebd402da04def3c53c41633", "4b40af369c1149949f5ccb68becd8430"], "properties": ["a, bowl, cup", "white, windows, lid"], "captions_pred_pc": ["a black and white image of two eyes in the sky", "above a black and white image of dots on a white background"], "captions_pred_image": ["a 3d model of a table on a marble floor royalty free 3d model preview no. 3", "a 3d model of a white house with three windows"], "question": "which object has a lid", "label": 1}, {"captions": [" a small house with a roof.", " of two rocks with ice elements."], "sample_ids": ["9578e8de15ec44ce802072aaa4df3910", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["roof, house, small", "image is a rock with ice elements"], "captions_pred_pc": ["above a black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a small house", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": ["an orange of a forklift truck in a parking lot.", " a house with a flat roof structure."], "sample_ids": ["32d757fbd29640ffb5aab34925525a29", "abc52d210d71415296730bb00352ce6f"], "properties": ["orange, forklift, parking lot", "roof, flat, structure"], "captions_pred_pc": ["a black and white illustration of a cell phone", "a black and white drawing of a window with dots around it"], "captions_pred_image": ["a black and white photograph of a forklift", "a 3d model of a house with a roof"], "question": "which structure has a flat roof", "label": 1}, {"captions": [" a destroyed house and plane amidst a town with buildings.", "a featuring a small room with a bunk bed, desk, chair, table, and a blue house."], "sample_ids": ["0fd3ddca09194b8f94ef731af3b64a08", "dd3a9323ed514ccab330973ff9588015"], "properties": ["house, plane, town", "room, bed, desk"], "captions_pred_pc": ["above a black and white drawing of a piece of paper", "a black and white drawing of a door"], "captions_pred_image": ["a 3d model of a damaged building", "a 3d model of a small room with a bunk bed"], "question": "which entity has a bed", "label": 1}, {"captions": ["a 3d-printed green plastic cylinder with a hole in the middle.", "royalty-free of a golden gramophone with a wooden base and umbrella."], "sample_ids": ["9faa0c251d394f368f4f537ea21f977f", "90bd720f583c4130a6273f5a94f6ae69"], "properties": ["color, material, shape", "image is royalty-free, gramophone, base"], "captions_pred_pc": ["a black and white image of a glittering object", "a black and white illustration of a water droplet in the shape of a snowflake"], "captions_pred_image": ["a 3d model of a white object on a gray background", "a 3d model of an antique gramophone"], "question": "which object is made of wood", "label": 1}, {"captions": [" a small white and brown house with windows and a door.", " a small house with a pond and situated on a rock."], "sample_ids": ["7de99253a4bc4d98bd941e40bbad8c7a", "92859eb82a344134806b37cc209927c6"], "properties": ["color, door, window", "house, rock, pond"], "captions_pred_pc": ["of a person wearing a black and white striped shirt", "in 15 words or less a black and white drawing of a toaster"], "captions_pred_image": ["a 3d model of a police box royalty free 3d model preview no.2", "a 3d model of a house in the middle of a field"], "question": "which house has a pond?", "label": 1}, {"captions": [" of a wooden nightstand with a hexagonal pattern, white top, and drawer.", "a featuring a man, woman, robot, cat, and dog interacting in and around a house."], "sample_ids": ["e6f2dbec6d464b4da4aa47bce242f6e5", "92498f398e244020a867686729633610"], "properties": ["Drawer, Pattern, White", "a, house, cat, dog"], "captions_pred_pc": ["a cross made of dots on a white background vector illustration of a cross made of dots on a white background royalty free illustration illustration of a cross made of dots on a white background vector illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a", "above a black and white illustration of a dog sitting on top of a pile of rocks"], "captions_pred_image": ["a 3d rendering of a wooden box with a handle", "a 3d model of a person standing in front of a wall"], "question": "which entity has a cat and a dog?", "label": 1}, {"captions": ["a small yellow and black helicopter flying in the air.", "green toy sand bucket and shovel with a squirt gun."], "sample_ids": ["7qxP6dQ5nNuaG8d0vswXXKnd0vq", "ae173b4afc4d4b0499f1e4e55d647c06"], "properties": ["color, yellow, black", "color, green, squirt gun"], "captions_pred_pc": ["a black and white illustration of an airplane", "a bucket with a sponge and a sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in"], "captions_pred_image": ["a black and white helicopter flying in the air", "a 3d model of a bucket and shovel royalty free 3d model preview no.2"], "question": "which object is green", "label": 1}, {"captions": [" of a small white dollhouse featuring furnished rooms, including a bedroom with a bed and desk, and a bathroom.", " of a stone wall with a window and a clock."], "sample_ids": ["f178fb523ad7421aaa90a92ee736ee00", "46fc13a04c8b47fa86215b9efc2eb1f9"], "properties": ["bedroom, bathroom, bed", "window, clock, wall"], "captions_pred_pc": ["a black and white drawing of a room with dots", "a black and white illustration of a bullet in the middle of a starry sky"], "captions_pred_image": ["a 3d model of a small room with a bed, desk, and chair", "a 3d model of a brick wall"], "question": "which entity has a window", "label": 1}, {"captions": [" the earth featuring various elements such as temperature chart, blue and green stripes, blue arrow, exosphere label, england label, blue and purple stripes, and a blue flag.", " a small house with trees and plants, featuring a white box with a blue lid and a green triangle, accompanied by a christmas tree."], "sample_ids": ["4945571db2d8467cb2aed8dd0d891c2e", "4a889132cc444d10bfcbf6c760984416"], "properties": ["color, temperature, england", "a, color, white"], "captions_pred_pc": ["of a black and white photo of an airplane on a white background", "a black and white illustration of a dandelion on a white background dandelion illustration on a white background stock illustration \u00a9 iStock/Getty Images"], "captions_pred_image": ["a 3d model of the earth with a rainbow in the sky", "a 3d model of a desk and chair"], "question": "which entity is a white box?", "label": 1}, {"captions": ["a white of a spaceship and building.", " a small house with trees and plants, featuring a white box with a blue lid and a green triangle, accompanied by a christmas tree."], "sample_ids": ["bf7d4277c9184d35abdec85bd5e25956", "4a889132cc444d10bfcbf6c760984416"], "properties": ["image, building, spaceship", "a, color, white"], "captions_pred_pc": ["a black and white drawing of a tree", "a black and white illustration of a dandelion on a white background dandelion illustration on a white background stock illustration \u00a9 iStock/Getty Images"], "captions_pred_image": ["a 3d model of a white object on a gray background", "a 3d model of a desk and chair"], "question": "which image has a spaceship and building?", "label": 0}, {"captions": [" of a wooden nightstand with a hexagonal pattern, white top, and drawer.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["e6f2dbec6d464b4da4aa47bce242f6e5", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["Drawer, Pattern, White", "island, mountain, grass"], "captions_pred_pc": ["a cross made of dots on a white background vector illustration of a cross made of dots on a white background royalty free illustration illustration of a cross made of dots on a white background vector illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a white background illustration of a cross made of dots on a", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d rendering of a wooden box with a handle", "a 3d image of a small island in the middle of a lake"], "question": "which entity has grass", "label": 1}, {"captions": [" a long row of steel shelves in a warehouse, featuring a suspended scaffolding system.", "a white glass beer mug."], "sample_ids": ["578fe7a7bd754b889be33aea99cf5050", "1d686cbd3e9a4c629a43088658989286"], "properties": ["a, material, steel", "color, white, glass"], "captions_pred_pc": ["above a black and white image of a rack with multiple shelves", "a black and white drawing of a beer mug on a white background"], "captions_pred_image": ["a 3d model of a large metal structure", "a 3d model of a glass pitcher"], "question": "which object is made of glass", "label": 1}, {"captions": [" a white and yellow table with yellow legs and a metal roof structure.", " a house with a green, wooden-structured roof."], "sample_ids": ["a71c43af3c944bf5b6d12375f7d54811", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["color, white, yellow, roof, metal", "roof, color, green"], "captions_pred_pc": ["above a black and white drawing of a floor plan", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a 3d model of a white table with multiple cubes", "a 3d model of a house with a triangular roof"], "question": "which roof is made of wood", "label": 1}, {"captions": ["a 3d object featuring a flat roof with blue and white pattern, a purple and white bench, a blue and purple striped runner, a bed with blue and purple stripes, blue and white striped paper, a bench with a blue and purple pattern, and a ceiling light fixture with wooden slats.", " a small wooden house."], "sample_ids": ["28d0cf71ed684dcb87b2c9b2744c3633", "4cb4dba1237443eb8dc299530fa12521"], "properties": ["runner, bed, bench", "house, material, wood"], "captions_pred_pc": ["a black and white drawing of a snowflake on a white background a creative black and white drawing of a snowflake on a white background royalty free illustration", "a house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots"], "captions_pred_image": ["a 3d model of a long, curved structure", "a 3d model of a small cottage"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a white circular light with a yellow dot in the middle.", "a black motorcycle helmet with a face mask and visor."], "sample_ids": ["fa753fe490854b35be3b76450d2641e0", "ad6df43a2ce24edfb15f5bb64755ed0d"], "properties": ["color, shape, size", "color, black, visor"], "captions_pred_pc": ["above an illustration of a black circle with a white background", "a black and white circular shape made up of many small dots on a white background a black and white circular shape made up of many small dots on a white background illustration"], "captions_pred_image": ["a white circular object on a gray background", "a black motorcycle helmet with a visor on top"], "question": "which entity is black", "label": 1}, {"captions": [" an ice cream machine, popsicle with two sticks, and a shelf with a computer monitor.", "a featuring a graffiti-covered train, bench, wall, and skateboard."], "sample_ids": ["cb840159fea7436d81eb33bdccad3596", "6de9fcac063d45df9424decdc215b379"], "properties": ["A, a, a", "graffiti, bench, wall"], "captions_pred_pc": ["a black and white illustration of a bench", "for a black and white image of a boat"], "captions_pred_image": ["a 3d rendering of a white and gray wall mounted shelf", "a graffiti-covered wall in a black and white photograph"], "question": "which entity has graffiti on it", "label": 1}, {"captions": [" a small orange piano with legs.", " a house with wooden framing and trusses."], "sample_ids": ["d793de7c08d74414beeb8ea50f730705", "4501794e257c4a8ba60a94757d8e93a9"], "properties": ["color is orange, size is small, legs", "frame, trusses, wood"], "captions_pred_pc": ["a black and white illustration of a bench", "a black and white drawing of a window"], "captions_pred_image": ["a 3d model of a grand piano royalty free 3d model preview no 2", "a 3d model of a house under construction"], "question": "which object is made of wood", "label": 1}, {"captions": ["a 3d object collection featuring a long stick, a grassy hill, a fish, a green and yellow knife, a small green and purple fish, a bird, and a crocodile.", " of a white building with a small house and a desk with a laptop."], "sample_ids": ["856e5f6a854d4c608901f1e2b580344c", "9244a2d3a9e94c8398ef991f1661bb58"], "properties": ["a, b, c", "a, desk, laptop"], "captions_pred_pc": ["above a black and white drawing of a submarine", "a black and white image of a piece of furniture"], "captions_pred_image": ["a black and white photograph of a fish on a gray background", "a 3d model of an office desk on a white background"], "question": "which entity has a desk with a laptop?", "label": 1}, {"captions": [" a wooden-framed house with roof trusses.", " a large steel building with many columns and a pool."], "sample_ids": ["e60dd370c5ec468da4689a801f951157", "2ce649a4152a45bab60d8cafa1dcdeb3"], "properties": ["frame, roof, trusses", "building material, pool, steel"], "captions_pred_pc": ["a black and white drawing of a metal grate", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d model of a house under construction", "a 3d model of a concrete structure"], "question": "which building material is used to build the pool", "label": 1}, {"captions": [" a black box with an open lid and an orange handle.", " a black box with an open lid and an orange handle."], "sample_ids": ["2a916894edd54029b325cf51f96b0904", "2a916894edd54029b325cf51f96b0904"], "properties": ["color, black, lid, open, orange, handle", "color, black, lid, open, orange, handle"], "captions_pred_pc": ["a piece of paper with black dots on a white background a piece of paper with black dots on a white background royalty free illustration", "a piece of paper with black dots on a white background a piece of paper with black dots on a white background royalty free illustration"], "captions_pred_image": ["a black and white cardboard box with an open lid", "a black and white cardboard box with an open lid"], "question": "which box has a lid that is open?", "label": 0}, {"captions": [" a small village featuring houses, trees, and a winding road.", "a pixelated-textured purple sphere ."], "sample_ids": ["7acf46c0265d4e39b97ac084852abde8", "fb68393941804e769d5c9b372864a642"], "properties": ["houses, trees, road", "texture, color, shape"], "captions_pred_pc": ["in 15 words or less a black and white photo of a mountain landscape", "a black and white square made of dots on a white background"], "captions_pred_image": ["a black and white photograph of a small town", "a 3d model of a gray sphere on a white background"], "question": "which entity is a sphere?", "label": 1}, {"captions": [" a set of yellow and black tools, including a magnifying glass, hexagonal keys, and screwdrivers.", " a stone throne with stairs and a tree, accompanied by a concrete wall featuring a statue and a fireplace with a shelf above it."], "sample_ids": ["40285a60f32749a8ae38957c7b073fe8", "93fb4197f0014f7582029af24c7ed9de"], "properties": ["color, yellow, black", "throne, stairs, tree"], "captions_pred_pc": ["a set of three black and white keychains on a white background", "in 15 words or less a black and white image of a toilet paper roll on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a pair of hexagonal keys and a pair of hexagonal scissors royalty free 3d model preview no 3", "a 3d model of a throne with a tree on it"], "question": "which entity has a tree?", "label": 1}, {"captions": [" of a metal tool with a yellow handle, a laptop, and a ceiling light fixture.", " a green mountain with trees and grass."], "sample_ids": ["b714bf13e9e54acb867c2c1b3ccf8ae8", "08fb23bdc67b4b0ba5fc64ea5c97e5f7"], "properties": ["metal, laptop, light fixture", "mountain, grass, tree"], "captions_pred_pc": ["for a black and white image of a corner shelf", "in 15 words or less a black ink blot on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a telescope on a stand", "a 3d model of a mountain with snow on it"], "question": "which entity has more grass", "label": 1}, {"captions": [" of white spheres resembling a molecule.", " of a stone fountain with a lion statue, surrounded by an archway inspired by the arch of triumph."], "sample_ids": ["9d2c94d03ca745948b8cb4e8cafddb1c", "a72700696c3b44ef8101d1e71e914bc9"], "properties": ["color, shape, number", "lion, statue, fountain"], "captions_pred_pc": ["of a black and white 3d model of a molecule on a white background a black and white 3d model of a molecule on a white background royalty free illustration", "a black and white image of a metal object"], "captions_pred_image": ["a 3d sculpture of a white ball on a gray background", "a 3d image of a lion statue on a wall"], "question": "which entity is a statue?", "label": 1}, {"captions": [" a large bridge over a highway, with an airport and train station nearby.", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["b348fddc913f47df93cf35db302427d0", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["location, highway, train station", "lizard, rock, stuffed animal"], "captions_pred_pc": ["a black and white photo of a small square on a white background", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["an aerial view of a city with a highway in the foreground and buildings in the background", "a 3d model of a vehicle with wheels and tires"], "question": "which entity has a lizard on a rock?", "label": 1}, {"captions": [" of a green man with arms outstretched, appearing to fly.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["34850e40521940e49cfc27f0f486f544", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["color, shape, size", "island, terrain, water"], "captions_pred_pc": ["a black and white illustration of an airplane propeller", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a man with his arms outstretched royalty free 3d model no.2", "a 3d image of a small island in the middle of a lake"], "question": "which entity is a small island?", "label": 1}, {"captions": [" a futuristic space station featuring a bench, computer desk with a laptop, small coffee machine, printer, and computer monitor.", "a featuring a building, a square, a cloud with a square in the middle, a group of people in a room and a field, and clouds floating in the sky."], "sample_ids": ["9b8e2f9070b24956a343a01a5fabdf03", "8557a15b9f244d2cbf16786dbc8b7b25"], "properties": ["computer desk, laptop, monitor", "building, room, sky"], "captions_pred_pc": ["a black and white silhouette of a traditional japanese gate", "above a black and white image of a person's hand holding a paintbrush"], "captions_pred_image": ["a 3d model of a futuristic room with a bench", "a 3d rendering of white clouds floating in the air"], "question": "which entity has a room?", "label": 1}, {"captions": [" of a black and white coffee table with a laptop, featuring a performance studio and ceiling grid structure.", " a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole."], "sample_ids": ["a177693cc8c7428292680816001b48c6", "b16fb21cda9a4a21a024df749c2304f4"], "properties": ["black, white, laptop", "roof, ceiling, hole"], "captions_pred_pc": ["a black and white drawing of a room with dots on the floor", "a black and white image of a square with dots on it"], "captions_pred_image": ["a dishwasher with a dish inside it", "a 3d model of a small house and a tree in the foreground"], "question": "which entity has a ceiling with a hole", "label": 1}, {"captions": [" the earth featuring various elements such as temperature chart, blue and green stripes, blue arrow, exosphere label, england label, blue and purple stripes, and a blue flag.", " a small triangular-shaped object."], "sample_ids": ["4945571db2d8467cb2aed8dd0d891c2e", "2d02985030804209a26c2c53b96a06f9"], "properties": ["color, temperature, england", "shape, triangle, small"], "captions_pred_pc": ["of a black and white photo of an airplane on a white background", "a black and white image of a piece of metal"], "captions_pred_image": ["a 3d model of the earth with a rainbow in the sky", "a black piece of furniture on a white background"], "question": "which object is a triangle?", "label": 1}, {"captions": [" of a small wooden house with a blue roof, featuring a police sign and resembling a lighthouse.", " a small white house with stairs and a spiral staircase, featuring a white table and ceiling light."], "sample_ids": ["9b2c93d651c3409096118c5ce5b993f2", "e9e1cc7fae22458197a61f43a9c355f4"], "properties": ["house, roof, blue", "house, staircase, table"], "captions_pred_pc": ["a black and white illustration of a coffee mug on a white background royalty free illustration", "above a black and white photograph of a dog in a frame"], "captions_pred_image": ["a 3d model of a small house and barn", "a 3d model of a small house with a spiral staircase"], "question": "which house has a spiral staircase?", "label": 1}, {"captions": ["a 3d blue star featuring various text, including \"prchen,\" \"bible chen,\" \"rib chicken,\" and \"birch chen.\"", " a colorful, wire-framed building structure resembling a cube."], "sample_ids": ["0b712fc68ad44ce8a33592d2b26aac18", "62b7c7c684044d998fee9ff35beeb79b"], "properties": ["color, shape, text", "color, frame, shape"], "captions_pred_pc": ["a black and white image of a pair of sunglasses", "a black and white illustration of a building made up of dots"], "captions_pred_image": ["a white paper airplane flying over a gray background", "a 3d model of a building structure"], "question": "which entity is a cube?", "label": 1}, {"captions": [" a castle on an island with a small floating house, trees, and clouds.", "a low poly of a plant on a white object, resembling a paper or plastic bag."], "sample_ids": ["c4c09479570943e2845fbd4c6a450568", "d49d8ed244094349a99e4faca05e0690"], "properties": ["castle, island, house", "low poly, plant, white"], "captions_pred_pc": ["above a black and white illustration of a group of dots in the shape of a circle", "a black and white illustration of a dandelion on a white background a black and white illustration of a dandelion on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small house on an island", "a 3d model of a plant growing out of a rock"], "question": "which object is white", "label": 1}, {"captions": ["a featuring a sailboat with a hook, a bird suspended in the air, and a pair of scissors.", " of two rocks with ice elements."], "sample_ids": ["f57ae66555d34349aeadc38b33f8f267", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["a, bird, hook", "image is a rock with ice elements"], "captions_pred_pc": ["of a 3d scan of a person's torso and limbs", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a black and white photo of a kite flying in the sky", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": ["a featuring a white and blue structure with a table, blue blocks, and suspended blue cubes.", " a table with pillar-like yellow and white poles, featuring a wooden structure, large tent, glass roof, and a chandelier with numerous glass tubes."], "sample_ids": ["1d2cfe3a03004b62b17d3ce065658302", "fa06167d83e54b05bdfbeeae2ca7c8a6"], "properties": ["color, table, blocks", "table, structure, roof"], "captions_pred_pc": ["a group of people standing on top of each other on a white background a group of people standing on top of each other on a white background royalty free illustration", "a black and white image of a map with dots"], "captions_pred_image": ["a 3d model of a table with four legs", "a 3d model of a building with many pillars"], "question": "which entity has a table with pillar-like yellow and white poles?", "label": 1}, {"captions": [" a red \"welcome to northwich\" billboard on a wooden base.", " of a small white dollhouse featuring furnished rooms, including a bedroom with a bed and desk, and a bathroom."], "sample_ids": ["225e4094141d416faba7c5598dc55ff8", "f178fb523ad7421aaa90a92ee736ee00"], "properties": ["base material is wood, color is red, message is welcome to northwich", "bedroom, bathroom, bed"], "captions_pred_pc": ["a black and white illustration of a circular object with many small dots on it", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d model of a chalkboard on a pedestal royalty free 3d model preview no.2", "a 3d model of a small room with a bed, desk, and chair"], "question": "which object has a bathroom", "label": 1}, {"captions": [" a house featuring a roof with truss system, framing, insulation, and a ceiling light.", " a large house with a roof on a platform."], "sample_ids": ["39876e69e3914d99a07e0dc59611c5c0", "cb3e09a301b746918a682a595037c7f7"], "properties": ["roof, truss system, framing", "roof, platform, house"], "captions_pred_pc": ["a black and white drawing of a window with dots all over it", "a black and white image of a piece of paper"], "captions_pred_image": ["a 3d model of the roof of a house", "a 3d model of a small house"], "question": "which entity has a roof on a platform?", "label": 1}, {"captions": [" a black table with a metal shelf and folding door, featuring a hanging metal shelf from the ceiling.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["c7fdb8d8f32b415cb5d2a9b41dd7d77b", "06a1c233fb444830b577aa06e2c01294"], "properties": ["Black, Shelf, Metal", "house, tree, hill"], "captions_pred_pc": ["a black and white drawing of a spiral staircase", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a 3d model of a black shelf with square tiles", "a black and white image of a house in the middle of a field"], "question": "which entity has a house on a hill?", "label": 1}, {"captions": ["a yellow and blue toy submarine.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["da2719db8f4f4668af5b74c96e80c6cd", "c3a82df41875402285608ef13a55df57"], "properties": ["color, yellow, blue", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["of a black and white image of a starfish on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a submarine toy", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a house featuring a pink-purple roof with trusses and wooden ceiling beams.", " of a crab holding a rubik's cube."], "sample_ids": ["b6b6a3f82bdd47c3afaf9af885ba8703", "d351e74340e14ef09fb24b69dd4a6502"], "properties": ["roof, trusses, beams", "image is a crab holding a rubik's cube"], "captions_pred_pc": ["a black and white pattern of dots in the shape of a square royalty free illustration", "a black and white drawing of a spider on a white background"], "captions_pred_image": ["a 3d model of the roof of a building", "a black and white image of a crab holding a cube"], "question": "which entity is a picture of a crab holding a rubik's cube?", "label": 1}, {"captions": [" a building with blue lines and structure.", " a building with blue metal framing and structure."], "sample_ids": ["3bf337b699664ca0adf0817962d58718", "ce40210c2a7e49dfaebbd934ccec4eca"], "properties": ["color, shape, structure", "color, blue, structure"], "captions_pred_pc": ["a black and white drawing of a floor plan", "a black and white image of dots on a white background"], "captions_pred_image": ["a 3d model of a building under construction", "a 3d model of a building under construction"], "question": "which building has a blue metal framing and structure?", "label": 1}, {"captions": [" a house with a pink roof, brick walls, and insulated ceiling.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["c8936ace72954650b4e2d84246964849", "a17477b445b3443189dad22f768b888b"], "properties": ["roof, color, pink", "roof, pillar, stairs"], "captions_pred_pc": ["a black and white drawing of a toilet", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d model of a house with a roof", "a 3d model of a small building with a balcony"], "question": "which house has a roof with a pillar?", "label": 1}, {"captions": [" of a wall featuring various potted plants.", " of a wall featuring various potted plants."], "sample_ids": ["54d03259a316434e9c670a52a7e5a92e", "54d03259a316434e9c670a52a7e5a92e"], "properties": ["image, wall, plants", "image, wall, plants"], "captions_pred_pc": ["a black and white image of a knife on a white background", "a black and white image of a knife on a white background"], "captions_pred_image": ["a black and white image of a wall with bullet holes", "a black and white image of a wall with bullet holes"], "question": "which image shows a wall featuring various potted plants?", "label": 0}, {"captions": ["a featuring a skeleton, torn paper, long stick, rock, and broken wood.", "a 3d cartoon character of a boy wearing a cowboy hat and blue pants."], "sample_ids": ["46903bf029934b1989bc062dcb0a5531", "e94fb39e384f47ce87cce1930851b8f8"], "properties": ["skeleton, torn, paper, long stick, rock, broken wood", "hat, pants, shirt"], "captions_pred_pc": ["a close up of a black object on a white background", "a square made of dots on a white background a square made of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d sculpture of a person's hand in the air royalty-free 3d model preview", "a 3d model of a cartoon character wearing a hat"], "question": "which entity has a hat", "label": 1}, {"captions": ["a white container with a lid, labeled \"real vanilla,\" containing white powder, resembling a yogurt or raw ice cream container with a logo.", " a small, rocky island with diverse terrain and scattered rocks."], "sample_ids": ["ecd1d75076b641b382b670987e8a55ab", "09f2cf267e954c958828325067bcc36a"], "properties": ["container, powder, white", "island, terrain, rocks"], "captions_pred_pc": ["in 15 words a black and white image of dots on a white background a black and white image of dots on a white background royalty free illustration", "above a black and white photo of a small island in the middle of a body of water"], "captions_pred_image": ["a clear plastic container with a white label on it", "a black and white image of a piece of dirt on the ground"], "question": "which entity has more rocks", "label": 1}, {"captions": [" a stone wall featuring a statue and a window.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["09ab58e01e0d4393bc1e82f157e641be", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["a, window, statue", "island, terrain, water"], "captions_pred_pc": ["a black and white image of dots on a white background", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a black and white photo of a sculpture of jesus on the cross", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": ["a featuring a large flying ship, a mountain range with a central lake, and a small island resembling hawaii.", "a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier."], "sample_ids": ["4d613d2057454e719bcae7f8cf05210a", "ef8288c9fdfc4e0f9c1fe25d570a104e"], "properties": ["a, island, resembles, hawaii", "color is white, yellow, plastic"], "captions_pred_pc": ["above a black and white image of a person's hand holding a pencil", "a black and white image of a metal bowl with dots"], "captions_pred_image": ["a 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a landscape in black and white 3d model of a", "a white plastic container with a label on it"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a featuring a tree stump, mossy wood, leaves, and a rock with grass.", "a featuring a man, woman, robot, cat, and dog interacting in and around a house."], "sample_ids": ["2527cd763a1a43f9870eb65e44e79f7d", "92498f398e244020a867686729633610"], "properties": ["mossy, rock, grass", "a, house, cat, dog"], "captions_pred_pc": ["a black and white image of a person on a skateboard", "above a black and white illustration of a dog sitting on top of a pile of rocks"], "captions_pred_image": ["a 3d model of a piece of wood on a white background", "a 3d model of a person standing in front of a wall"], "question": "which entity is a photograph of a house?", "label": 1}, {"captions": ["a white 3d object featuring black and white patterns, resembling a combination of a dish, smoke detector, cake, bowl, alarm clock, and ceiling light.", " of a white plastic tube with a hole and a chip on it."], "sample_ids": ["6a8cc820f00a4cfc954d56e2b1f6206a", "9968e06a62e8487ea33460e640abc573"], "properties": ["- material is plastic- color is white- shape is cylinder", "color is white, material is plastic, shape is tube"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a plate", "a black and white image of a broom on a stand"], "captions_pred_image": ["a cake with a black and white design on the top of the cake", "a white object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a red and blue metal-framed building structure with a steel house frame.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["a453ac83ef1e4f76bd7978451888d5f5", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["color, material, frame", "house, roof, wooden"], "captions_pred_pc": ["of a black and white photo of a bike on a white background", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a metal frame structure", "a black and white photograph of a birdhouse"], "question": "which house is made of wood", "label": 1}, {"captions": [" a small white building featuring a room with a door and a white shelf.", " a small house with a roof."], "sample_ids": ["d7b78fa9a6b64f6095b881bc619b04fe", "0d2246e433ce4066b76489f17ba8d694"], "properties": ["room, door, shelf", "roof, house, small"], "captions_pred_pc": ["above a black and white illustration of a person standing in front of a door", "a black and white square made up of small dots on a white background"], "captions_pred_image": ["a 3d model of an empty room", "a 3d model of a house with a triangular roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" a row of houses featuring roof structures with green roofs and tiled roof slats.", " an african figurine with a bow and arrow, standing on a wooden base, wearing a hat."], "sample_ids": ["aef9b23a78a7450286a961cc13448d00", "d81d13362ae04371bb2cba46e4939665"], "properties": ["roof, green, tiled", "hat, bow, arrow"], "captions_pred_pc": ["of a black and white photo of a decorative metal wall hanging", "above a black and white photo of an ice sculpture"], "captions_pred_image": ["a 3d model of a set of stainless steel shelves", "a sculpture of an african man sitting on a pedestal"], "question": "which entity has a hat?", "label": 1}, {"captions": [" of a wooden truck with a load of snow and crates, driving on a pile of wood.", " a house with a green, wooden-structured roof."], "sample_ids": ["ac1e2dcbe71945d58204f95d16d658b1", "d58bb529b1434809a64b62f1b2899c3f"], "properties": ["load, crates, snow", "roof, color, green"], "captions_pred_pc": ["above a 3d image of a bench with a white background", "a black and white image of the letter 'l' made up of dots on a white background"], "captions_pred_image": ["a truck with a pile of snow on top of it", "a 3d model of a house with a triangular roof"], "question": "which entity has a roof that is green?", "label": 1}, {"captions": ["a low poly of a deer, antelope, llama, capybara, and kangaroo.", " a suitcase on wheels, featuring a bird, a laptop, a robot, and a vacuum cleaner."], "sample_ids": ["8b4c2e3e76524d85a9395ea1169d953e", "e8100bef7b8a48d4ac79684bffb349ba"], "properties": ["low poly, llama, kangaroo", "Wheels, laptop, robot"], "captions_pred_pc": ["above a black and white image of an animal sculpture", "a black and white drawing of a cell phone"], "captions_pred_image": ["a 3d low poly animal standing on its hind legs", "a black and white image of a suitcase on wheels"], "question": "which entity has a laptop?", "label": 1}, {"captions": ["white of a rhino head with horns.", " of two rocks with ice elements."], "sample_ids": ["8481aade84de47cab1a9accf8067e678", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["image, rhino, head", "image is a rock with ice elements"], "captions_pred_pc": ["of a penguin skull in black and white", "a black and white image of two rocks on a white background"], "captions_pred_image": ["rhino head 3d model royalty free 3d model preview no 3", "a 3d image of two rocks on a gray surface"], "question": "which image is of a rock with ice elements?", "label": 1}, {"captions": [" of a small building with camouflage pattern in a field.", " a large steel building with a pool."], "sample_ids": ["0c75e4ce947a4c25ad1b7e5cb4e7a9a1", "72eed67d8d884c819b28e2e95eb48f06"], "properties": ["building, pattern, field", "building material, pool, steel"], "captions_pred_pc": ["in 15 or fewer words a black and white image of a couch", "a black and white illustration of a building with a tree growing out of it"], "captions_pred_image": ["a 3d model of a small building in the middle of a desert", "a 3d model of a concrete structure"], "question": "which building is made of steel", "label": 1}, {"captions": [" a bridge structure with a pier, stairway, railings, and red and green elements.", "red swivel chair with a white base ."], "sample_ids": ["23701a9674204ea881fb8af9914b1924", "7b78fb47a2684906bcc22ac6e848999a"], "properties": ["color, pier, railings", "color, red, base, white"], "captions_pred_pc": ["a black and white illustration of a computer keyboard", "a black and white image of a sphere made up of many small dots on a white background"], "captions_pred_image": ["a 3d model of a staircase in the middle of a field", "a 3d model of a modern chair"], "question": "which entity has a white base", "label": 1}, {"captions": ["a featuring a train, large ship, long metal pipe, boat, black map of kenya, and a black piece of plastic.", " a house with roof trusses and wooden beams on a suspended ceiling."], "sample_ids": ["49c5a9d42ba64fb2b681ef583d700b98", "3c2e3a3670b042069bd8290e2c357702"], "properties": ["a train, a ship, a boat", "roof trusses, beams, suspended ceiling"], "captions_pred_pc": ["above a black and white image of a long, curved line on a white background", "above a black and white drawing of a building"], "captions_pred_image": ["a 3d model of a submarine", "a 3d model of a house with a roof in progress royalty free 3d model preview no. 1"], "question": "which entity has a roof truss?", "label": 1}, {"captions": [" a destroyed house and plane amidst a town with buildings.", " a small, snow-covered house."], "sample_ids": ["0fd3ddca09194b8f94ef731af3b64a08", "0d00d10b90134dbe9ce7b2b3d6669237"], "properties": ["house, plane, town", "house, snow, cover"], "captions_pred_pc": ["above a black and white drawing of a piece of paper", "in 15 words or less a black and white image of a piece of paper on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a damaged building", "a piece of broken glass on a white background"], "question": "which house is covered in snow", "label": 1}, {"captions": ["a featuring a metal refrigerator, a graffiti-covered sink, and a metal toilet, all with rusted elements.", " various vehicles, including a car, truck, monster truck, and military vehicle, along with a stuffed animal and a lizard on a rock."], "sample_ids": ["e65de9c4ec9242679a45e74733f7d61d", "74d87b126e9d4d65839f4117de835ca7"], "properties": ["rusty, sink, graffiti", "lizard, rock, stuffed animal"], "captions_pred_pc": ["a black and white drawing of a room with a door and a rug on the floor a black and white drawing of a room with a door and a rug on the floor royalty free illustration", "a black and white drawing of a tricycle on a white background a black and white drawing of a tricycle on a white background royalty free illustration"], "captions_pred_image": ["a stainless steel toilet bowl on a pedestal", "a 3d model of a vehicle with wheels and tires"], "question": "which entity has a stuffed animal?", "label": 1}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", " of a white human skull with broken bone elements."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "6ca0f91b85464d7a845b3977351dd0b5"], "properties": ["color, material, structure", "color, white, skull"], "captions_pred_pc": ["a black and white drawing of a room with dots", "a black and white image of a cat's x-ray"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a 3d model of a human skull in white"], "question": "which entity is white", "label": 1}, {"captions": ["an orange and white striped rocket model.", "a white of a woman with her arms outstretched."], "sample_ids": ["9f19d5d47d174d3382c7dc31aaf22f0b", "2cbfaa4fb2d84a6f94e67b4fd6e2f26f"], "properties": ["color, orange, white", "image, color, white"], "captions_pred_pc": ["a black and white drawing of a tree on a white background vector illustration of a black and white drawing of a tree on a white background, isolated on a white background illustration of a black and white drawing of a tree on a white background, isolated on a white background illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a tree on a white background, isolated on a white background, vector illustration of a black and white drawing of a", "a black and white silhouette of a person on a skateboard"], "captions_pred_image": ["a 3d model of a rocket on a gray background", "a 3d model of a woman with her arms outstretched"], "question": "which entity is a white image?", "label": 1}, {"captions": [" a wooden shed with a gray roof.", " a small house on an island with a boat and a bird on a rock."], "sample_ids": ["8b32e1ded62144768cd9ca8945fa8524", "1e56a92a0ddc41e59694bd1ad1656149"], "properties": ["roof, color, gray", "house, rock, bird"], "captions_pred_pc": ["a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration", "a black and white drawing of a boat in the middle of a body of water"], "captions_pred_image": ["a 3d model of a shed with a gray roof", "a 3d rendering of a house on a rock"], "question": "which entity has a bird on a rock?", "label": 1}, {"captions": [" a small house on an island with trees, shrubs, a pool, and a lake.", " a small building with windows and a roof."], "sample_ids": ["c8331489fca44685bedfa1bdadf6ccb3", "0ef2cac27e364c0687afae7ab5040cc3"], "properties": ["house, lake, pool", "roof, windows, building"], "captions_pred_pc": ["a black and white image of a pattern on a piece of paper", "a black and white square made up of small dots on a white background"], "captions_pred_image": ["a 3d model of a large building", "a 3d model of an apartment building royalty free 3d model preview no 3"], "question": "which entity has a roof", "label": 1}, {"captions": [" a white dragon with red horns and eyes.", "a yellow and blue toy submarine."], "sample_ids": ["b7efa835186c4084b4d6ca2479af78fb", "da2719db8f4f4668af5b74c96e80c6cd"], "properties": ["color, horns, eyes", "color, yellow, blue"], "captions_pred_pc": ["above a black and white image of a sculpture made up of dots", "of a black and white image of a starfish on a white background"], "captions_pred_image": ["a 3d model of a goat's head with horns", "a 3d model of a submarine toy"], "question": "which entity is a toy?", "label": 1}, {"captions": ["a 3d white cube with windows resembling a building.", " a house featuring a pink-purple roof with trusses and wooden ceiling beams."], "sample_ids": ["4a07a5293f024bb0a353954a056ef626", "b6b6a3f82bdd47c3afaf9af885ba8703"], "properties": ["- material is white- color is white- texture is textured", "roof, trusses, beams"], "captions_pred_pc": ["a black and white image of a square made up of small dots", "a black and white pattern of dots in the shape of a square royalty free illustration"], "captions_pred_image": ["a 3d model of a cube", "a 3d model of the roof of a building"], "question": "which entity has a roof", "label": 1}, {"captions": [" a small house with a roof.", " a wooden shed with a gray roof."], "sample_ids": ["0d2246e433ce4066b76489f17ba8d694", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["roof, house, small", "roof, color, gray"], "captions_pred_pc": ["a black and white square made up of small dots on a white background", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a house with a triangular roof", "a 3d model of a shed with a gray roof"], "question": "which roof is gray", "label": 1}, {"captions": [" of a small island featuring a white lighthouse, a fountain, and a grassy crater.", " a small white building featuring a room with a door and a white shelf."], "sample_ids": ["2a30e69498ff4fd1a33c1fb72286f553", "d7b78fa9a6b64f6095b881bc619b04fe"], "properties": ["lighthouse, fountain, crater", "room, door, shelf"], "captions_pred_pc": ["a black beanie with sparkles on a white background", "above a black and white illustration of a person standing in front of a door"], "captions_pred_image": ["a black and white image of an object on top of a pedestal", "a 3d model of an empty room"], "question": "which entity has a door?", "label": 1}, {"captions": [" of a white plastic bar, resembling a shelf or towel rod, with a metal light fixture.", " of a large axe with a cross on top."], "sample_ids": ["f84dec547a3d4710b48db11fc9fa489a", "4b6734945e204158b076a429a30ce2e9"], "properties": ["light source, fixture, color", "axe, cross, top"], "captions_pred_pc": ["a black and white photo of a small square on a white background", "for an axe on a white background"], "captions_pred_image": ["a 3d rendering of a white object on a gray background", "a black and white image of an axe on a gray background"], "question": "which object has a cross on top", "label": 1}, {"captions": [" a green, spiked cube-like creature with eyes and a liquid-like texture.", " a small village featuring houses, trees, and a winding road."], "sample_ids": ["0d2850148a154f3ca72d72817e120759", "7acf46c0265d4e39b97ac084852abde8"], "properties": ["texture, spikes, eyes", "houses, trees, road"], "captions_pred_pc": ["a black and white drawing of a flower", "in 15 words or less a black and white photo of a mountain landscape"], "captions_pred_image": ["a 3d model of an ice cube on a white background royalty-free 3d model preview no. 3", "a black and white photograph of a small town"], "question": "which entity has a road?", "label": 1}, {"captions": [" a small building with a yellow roof, featuring a box, a chair, and a plane flying overhead.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["a2354f13774340d392fbf33564934aab", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["building, roof, yellow", "house, roof, wooden"], "captions_pred_pc": ["a black and white image of a cell phone", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d rendering of a machine with a conveyor belt", "a black and white photograph of a birdhouse"], "question": "which building has a wooden roof", "label": 1}, {"captions": [" a white rocking chair with a curved backrest.", " of a black and white striped fly in the air."], "sample_ids": ["ee0deb90abf943b6894cd5ded1331213", "77e2b8c9032945248951e06bc7e209e0"], "properties": ["backrest, curved, yes", "black, white, fly"], "captions_pred_pc": ["a black and white illustration of a spiral pattern on a white background a black and white illustration of a spiral pattern on a white background royalty free illustration", "for a silhouette of a butterfly on a white background"], "captions_pred_image": ["a 3d model of a white chair royalty free 3d model no. 3", "a black and white photograph of a fly"], "question": "which entity is a fly", "label": 1}, {"captions": [" of a wooden staircase with marble floor and wooden railings in a house.", " a wooden staircase and small table in a room with wooden floor and ceiling."], "sample_ids": ["ee70964ce5e841bd87381cff40d59b88", "c6b89316941b4d7c9562cd2ec0bf4706"], "properties": ["floor, staircase, railings", "floor, staircase, table"], "captions_pred_pc": ["a black and white drawing of a light switch", "above a black and white photograph of an object on a white background"], "captions_pred_image": ["a 3d model of a staircase on a marble surface", "a 3d model of a boat on a wooden surface royalty free 3d model no.3"], "question": "which staircase is in a room with wooden floor and ceiling?", "label": 1}, {"captions": [" of a house with a pink roof.", " a house with a blue roof."], "sample_ids": ["6162909df6294848a8eea83c3aa9585b", "8ff693cd3ca74f8a901ca259b8b3a7ac"], "properties": ["color, roof, pink", "roof, color, blue"], "captions_pred_pc": ["a black and white drawing of the letter 'p' on a white background illustration", "a black and white drawing of a cross on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a house in the style of the 1920s and 1930s", "a 3d model of a house with a roof"], "question": "which roof is blue", "label": 1}, {"captions": [" a black table with a metal shelf and folding door, featuring a hanging metal shelf from the ceiling.", " a futuristic, small spaceship with two propellers, resembling a jet fighter."], "sample_ids": ["c7fdb8d8f32b415cb5d2a9b41dd7d77b", "832a022cdcc74763b0571e04af4e592b"], "properties": ["Black, Shelf, Metal", "resembles a jet fighter, futuristic, small"], "captions_pred_pc": ["a black and white drawing of a spiral staircase", "a black and white illustration of a person's hand holding a pencil"], "captions_pred_image": ["a 3d model of a black shelf with square tiles", "a lego model of a futuristic airplane"], "question": "which entity is not a spaceship?", "label": 0}, {"captions": [" a small rocky island with a brown and white rug featuring a puddle on it.", " of a spider-man mask and head."], "sample_ids": ["4806b382466247ad9265fc8240a22d3d", "bd48915cfb41409aad6dd58e157876b2"], "properties": ["rocky, rug, puddle", "a, head, mask"], "captions_pred_pc": ["a black and white image of a long, curved line on a white background", "above a black and white image of a person's head"], "captions_pred_image": ["a 3d image of a rug on the ground, with a small puddle in the center royalty free 3d model preview no. 1", "a black spider-man mask on a gray background royalty-free 3d model preview no.2"], "question": "which object is not a head?", "label": 0}, {"captions": ["a white of a woman with her arms outstretched.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["2cbfaa4fb2d84a6f94e67b4fd6e2f26f", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["image, color, white", "camera, speaker, ceiling fan"], "captions_pred_pc": ["a black and white silhouette of a person on a skateboard", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a woman with her arms outstretched", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which image is black and white?", "label": 1}, {"captions": [" of a wooden cabinet with drawers and filing features.", "a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier."], "sample_ids": ["abbc90bbd5474f73b16482ccd10e07ec", "ef8288c9fdfc4e0f9c1fe25d570a104e"], "properties": ["Cabinet, Drawers, Filing", "color is white, yellow, plastic"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white image of a metal bowl with dots"], "captions_pred_image": ["a 3d rendering of a white wooden chest of drawers", "a white plastic container with a label on it"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a window with a colorful frame.", " a window with a colorful frame."], "sample_ids": ["59c7c35a04ce42dbb24e8f8e882c4127", "59c7c35a04ce42dbb24e8f8e882c4127"], "properties": ["frame, color, window", "frame, color, window"], "captions_pred_pc": ["a black and white image of a banner on a white background", "a black and white image of a banner on a white background"], "captions_pred_image": ["a 3d rendering of a window in a wall", "a 3d rendering of a window in a wall"], "question": "which window has a colorful frame", "label": 1}, {"captions": [" a white staircase with a black railing in a room.", " a small white building featuring a room with a door and a white shelf."], "sample_ids": ["d05c1b6047e145a4906c467a9ebe7430", "d7b78fa9a6b64f6095b881bc619b04fe"], "properties": ["color, white, railing, black", "room, door, shelf"], "captions_pred_pc": ["above a black and white image of a computer screen", "above a black and white illustration of a person standing in front of a door"], "captions_pred_image": ["a 3d rendering of a white staircase with a stainless steel handrail", "a 3d model of an empty room"], "question": "which room has a white door and a white shelf?", "label": 1}, {"captions": ["a featuring a small island with a mountain, a lake in the middle, and a bird flying above.", "a featuring a room with a staircase, a damaged bus and rv, two houses, a large white box, and destroyed buildings with debris."], "sample_ids": ["e51fead89ae64968b2ca7f4ccb6d3b97", "a47dcf3d3cf34c58af17b6715d6f1232"], "properties": ["a, bird, lake", "room, staircase, bus"], "captions_pred_pc": ["a black and white photograph of a cloudy sky", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d model of a mountain range in black and white", "a 3d image of a building with a lot of debris"], "question": "which entity has a bus?", "label": 1}, {"captions": [" a woman holding a blue balloon near a graffiti-covered wall.", "s of a laptop, printer, building, and small bathroom, along with a blue-lit box, white and blue truck, and hp color inkjet cartridge."], "sample_ids": ["eead0dd97023415da05c97df37337473", "747110c073314ee39ef2f4a8d63222da"], "properties": ["a, balloon, color, blue", "s, laptop, printer"], "captions_pred_pc": ["for a black and white photo of a person standing on the edge of a cliff", "a black and white image of a pair of scissors"], "captions_pred_image": ["a 3d rendering of a sculpture on a wall with a person in the foreground and the sculpture in the background royalty-free 3d model preview no. 3", "a black and white image of an electronic device"], "question": "which entity is not a balloon?", "label": 0}, {"captions": [" a red and blue metal-framed building structure with a steel house frame.", " a small wooden house with a green roof."], "sample_ids": ["a453ac83ef1e4f76bd7978451888d5f5", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["color, material, frame", "roof, color, green"], "captions_pred_pc": ["of a black and white photo of a bike on a white background", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d model of a metal frame structure", "a 3d model of a house with a ladder"], "question": "which house has a green roof", "label": 1}, {"captions": [" a white lighthouse building with a clock tower, rocket, and ladder.", "a low poly of a plant on a white object, resembling a paper or plastic bag."], "sample_ids": ["88eba412c78a4ced89eb857327653f6c", "d49d8ed244094349a99e4faca05e0690"], "properties": ["lighthouse, building, tower", "low poly, plant, white"], "captions_pred_pc": ["a black and white illustration of a water droplet on a white background", "a black and white illustration of a dandelion on a white background a black and white illustration of a dandelion on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white sofa on a white background", "a 3d model of a plant growing out of a rock"], "question": "which object is white", "label": 1}, {"captions": [" a building or house with a roof and floor plan, resembling a pyramid with a flat roof.", " a small white building with a door, resembling a box-shaped house."], "sample_ids": ["7a91292e1ed64e60a1bbbb499209a0df", "1b5fe88d0ff149ae9d8b4eb455c5c90c"], "properties": ["apse, roof, floor plan", "shape is box, color is white, door is present"], "captions_pred_pc": ["a black and white drawing of a room", "a black and white image of a person standing in front of a white background"], "captions_pred_image": ["a 3d model of a building in the shape of a pyramid", "a 3d model of a white, open shelving unit"], "question": "which building is a box shape", "label": 1}, {"captions": ["a 3d object featuring elements of a green and purple lamp, a cartoon character with a light bulb, an ice cream cone, a syringe, a light pole, a pen with a face, and a doctor's stethoscope.", " a small building with windows and a roof."], "sample_ids": ["42d3657ce7bc4b5dbeb7f444e089a715", "0ef2cac27e364c0687afae7ab5040cc3"], "properties": ["a lamp, a syringe, a light pole", "roof, windows, building"], "captions_pred_pc": ["a spoon with black dots on a white background spoon with black dots on a white background, 3d illustration royalty free stock photo", "a black and white square made up of small dots on a white background"], "captions_pred_image": ["a 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop on a white background 3d model of a lollipop", "a 3d model of an apartment building royalty free 3d model preview no 3"], "question": "which entity has a roof", "label": 1}, {"captions": [" of a bearded man wearing a green shirt and a hat.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["1e4e5e8133ae48c797facaec724c13a5", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["hat, shirt, bearded", "camera, speaker, ceiling fan"], "captions_pred_pc": ["of a black and white bracelet on a white background", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a man with a beard", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a ceiling fan?", "label": 1}, {"captions": ["a featuring a pile of food, leaves, shredded paper, and rocks with scattered broken paper pieces.", " a clear glass table with metal legs and balls on top."], "sample_ids": ["5206d4d96c2d428b9c1f7ee0e13bcffb", "7c2bfa826f274377ac21f48d510848c3"], "properties": ["food, leaves, shredded paper, rocks", "glass, metal, balls"], "captions_pred_pc": ["a black and white image of a bird in flight", "a black and white image of a wine glass"], "captions_pred_image": ["a 3d model of a mountain range on a white surface", "a clear acrylic foosball table"], "question": "which object is made of glass?", "label": 1}, {"captions": [" a toy motorcycle, car, and robot on an orange platform.", " of two rocks with ice elements."], "sample_ids": ["7407a108e0354925b83b750339bc03df", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["platform, color, orange", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white illustration of a bicycle", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a motorcycle on a pedestal", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": ["a 3d white object resembling a sphere, frog, egg, hat, and shell.", "red heart-shaped object."], "sample_ids": ["0a8e0b95d8ce43ee9159ad01d925aad8", "a848538c7e4249a4af8e86c477193fa1"], "properties": ["shape is sphere, color is white, material is plastic", "shape, heart, red"], "captions_pred_pc": ["a black and white illustration of a sponge in the shape of a sponge sponge black and white illustration of a sponge in the shape of a sponge royalty free illustration", "a black and white image of a skateboard with dots"], "captions_pred_image": ["a 3d sculpture of an apple on a white background", "a 3d model of a heart shaped object"], "question": "which object is red?", "label": 1}, {"captions": [" of a black flat screen lcd monitor on a stand.", " of a stone wall with a window and a clock."], "sample_ids": ["0cc63371c12344e892d1c7be5a9eb782", "46fc13a04c8b47fa86215b9efc2eb1f9"], "properties": ["screen, monitor, stand", "window, clock, wall"], "captions_pred_pc": ["a black and white close-up of a television on a stand", "a black and white illustration of a bullet in the middle of a starry sky"], "captions_pred_image": ["a 3d model of a computer monitor royalty free 3d model preview no. 2", "a 3d model of a brick wall"], "question": "which object has a clock on it?", "label": 1}, {"captions": [" of a man with black hair, wearing a black shirt, black pants, and a suit.", " a white sofa, chair, and box."], "sample_ids": ["ffbbd798b7e448849b7acd309717739c", "4c59733ebd634594a921b7ace60e4142"], "properties": ["a, shirt, pants", "sofa, chair, box"], "captions_pred_pc": ["a 3d illustration of a pile of pebbles on a white background 3d illustration of a pile of pebbles on a white background 3d illustration of a pile of pebbles on a white background 3d illustration of a pile of pebbles on a white background 3d illustration of a pile of pebbles on a white background 3d illustration of a pile of pebbles on a white background 3d illustration of a pile of pebbles on a white background 3d illustration of a pile of pebbles on a white background 3d illustration of a pile of pebbles on a white background 3d illustration of a pile of pebbles on a white background 3d illustration of a pile of pebbles on a white background 3d illustration of a pile of pebbles on a white background 3d illustration of a pile of pebbles on a white background 3d illustration of a pile of pebbles on a white background 3d illustration of a pile of", "a black and white drawing of a couch with dots"], "captions_pred_image": ["a black and white 3d printed figurine of a man", "a 3d model of a white chair"], "question": "which entity has more furniture", "label": 1}, {"captions": [" a teal and white building structure with metal poles, blue lines, and a hanging ceiling featuring numerous metal rods.", " a wooden shed with a gray roof."], "sample_ids": ["e4133ffb79714c50b70e9d62b4648c39", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["color, material, structure", "roof, color, gray"], "captions_pred_pc": ["a black and white drawing of a room with dots", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a table with multiple tables and chairs", "a 3d model of a shed with a gray roof"], "question": "which structure is made of wood", "label": 1}, {"captions": ["a white featuring a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet.", "a featuring a rock formation with various statues, including a woman, an eagle, and elements like wood and a shell."], "sample_ids": ["f28f57745af04115b0bacdde80c3b9ee", "36d90269173b4d1a84dbd61664593f66"], "properties": ["a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet", "a, eagle, wood"], "captions_pred_pc": ["a black and white illustration of a pair of sunglasses", "a black and white illustration of a map with dots all over it"], "captions_pred_image": ["a 3d model of a medieval drinking horn 3d model available for 3d printing royalty free 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d", "a 3d model of a person sitting on the edge of a cliff"], "question": "which entity does not feature a rock formation with various statues", "label": 1}, {"captions": ["a featuring a lamp, harp, white bowl, and white curved wall.", "a white of a man with arms outstretched."], "sample_ids": ["55bcec23e1b34f0d9d748b4dcc3ea123", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["lamp, harp, bowl", "image, color, white"], "captions_pred_pc": ["a black and white illustration of a curved line", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a 3d model of a harp in a white room", "a 3d model of a man with his arms outstretched"], "question": "which image is white", "label": 1}, {"captions": [" a gold, silver, and brown shield.", "a low-poly of a white, shattered sphere."], "sample_ids": ["b196502dac494aae8f343bf0e35b71cf", "94119660e7054fc5b7baa68a4e39968c"], "properties": ["color, shape, material", "sphere, color, texture"], "captions_pred_pc": ["a black and white image of a necklace with dots on it", "a black and white illustration of a dandelion on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white"], "captions_pred_image": ["a 3d rendering of a black and white wall mounted light fixture", "a 3d model of a cracked egg"], "question": "which entity is a sphere?", "label": 1}, {"captions": ["a featuring a small room with a bunk bed, desk, chair, table, and a blue house.", " a white bunk bed with a ladder."], "sample_ids": ["dd3a9323ed514ccab330973ff9588015", "379f488d0624482694bbe150b7bc1059"], "properties": ["room, bed, desk", "bed, ladder, color, white"], "captions_pred_pc": ["a black and white drawing of a door", "for a white square on a white background with a black square in the center"], "captions_pred_image": ["a 3d model of a small room with a bunk bed", "the bunk bed royalty free 3d model no. 3"], "question": "which bed has a ladder", "label": 1}, {"captions": [" of a white rectangular shelf or light fixture on a gray background.", " a white and blue building with a black roof."], "sample_ids": ["a84221f27ed0416280f8e67563f95ed7", "c893118316ee43e18322e5964b2806c5"], "properties": ["background, color, white", "color, white, blue, roof, black"], "captions_pred_pc": ["a black line on a white background", "a black and white illustration of a person standing on top of a hill made up of tiny dots"], "captions_pred_image": ["a long white plastic strip on a gray background", "a 3d model of a white building on a gray background royalty free 3d model no."], "question": "which entity has a white roof", "label": 1}, {"captions": [" a building with blue metal framing and structure.", " a house with a roof, wooden beams, and chimney."], "sample_ids": ["ce40210c2a7e49dfaebbd934ccec4eca", "be1376023c274bdda995d54f3694157f"], "properties": ["color, blue, structure", "roof, beams, chimney"], "captions_pred_pc": ["a black and white image of dots on a white background", "a black and white drawing of a bathroom with a shower"], "captions_pred_image": ["a 3d model of a building under construction", "a 3d model of a house with a roof"], "question": "which building has a roof", "label": 1}, {"captions": [" a white and black horned demon with bunny features.", "a pixelated-textured purple sphere ."], "sample_ids": ["1d3537d1341a423f89990e9b06924904", "fb68393941804e769d5c9b372864a642"], "properties": ["color, horns, features", "texture, color, shape"], "captions_pred_pc": ["above a black and white image of a pair of headphones", "a black and white square made of dots on a white background"], "captions_pred_image": ["a 3d model of a skull with horns on its head", "a 3d model of a gray sphere on a white background"], "question": "which entity is a sphere?", "label": 1}, {"captions": [" of a wooden windmill with a red roof.", "a black and white of a knife/sword with a handle."], "sample_ids": ["2ad8fca30285483d8b7f602fa078215d", "c7692fa635e049bda0a2039fa5a784a4"], "properties": ["roof, color, red", "image, color, black and white"], "captions_pred_pc": ["a white and black image of a snowflake in the shape of a snowflake", "of a black and white knife on a white background"], "captions_pred_image": ["a 3d model of a windmill on a gray background", "a black and white image of a knife"], "question": "which image is black and white", "label": 1}, {"captions": ["yellow and white 3d corn on the cob model resembling a toothbrush.", " of a person in a green outfit, resembling a cartoon character."], "sample_ids": ["1fd0e7ffe26349da89815a6d4d6a189a", "839a873262a544c9906f3f5799ca4648"], "properties": ["color, shape, material", "a, outfit, green"], "captions_pred_pc": ["a black and white illustration of a circle made up of tiny dots", "a person with arms outstretched 3d illustration of a person with arms outstretched on a white background illustration"], "captions_pred_image": ["a 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a", "a person jumping on a trampoline in slow motion"], "question": "which entity is a person?", "label": 1}, {"captions": [" a spacious office featuring numerous desks, chairs, computers, and a ceiling with blue and white triangles and a ceiling light.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["0d7e4d9471414a21b4a5b18a54f7ec22", "bded33af34104b9686b845dfd18309a9"], "properties": ["ceiling, light, desks", "table, staircase, light"], "captions_pred_pc": ["a black and white drawing of a square on a white background", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d model of an office space with desks and chairs", "a 3d model of a small table with a staircase"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" of a brick fireplace with a stick protruding, surrounded by brick walls.", " a small village featuring houses, trees, and a winding road."], "sample_ids": ["236ccf2dae264fc3a11cd5be5bc4e280", "7acf46c0265d4e39b97ac084852abde8"], "properties": ["fireplace, stick, surround", "houses, trees, road"], "captions_pred_pc": ["a black and white image of a toothbrush in the shape of a toothbrush", "in 15 words or less a black and white photo of a mountain landscape"], "captions_pred_image": ["a 3d model of a person standing in front of a fireplace royalty free 3d model preview no.2", "a black and white photograph of a small town"], "question": "which entity has a road?", "label": 1}, {"captions": ["a 3d white rectangular box with multiple compartments and a white lid.", "a featuring a building, a plane, a printing machine, a large machine, a room with a computer, and a large gray box."], "sample_ids": ["966f6e9324a74d90831924895d3f2e8c", "7e2b63ba4ce24cecacea67dd052016c1"], "properties": ["size, color, shape", "building, plane, room"], "captions_pred_pc": ["a black and white illustration of a patterned rug", "a black and white image of a rectangular frame with dots on a white background a black and white image of a rectangular frame with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a white cardboard box on a gray background", "a 3d model of a box with a lot of items inside"], "question": "which entity has more rooms", "label": 1}, {"captions": [" a wooden-framed house with roof trusses.", " a red, blue, and green striped tower building."], "sample_ids": ["e60dd370c5ec468da4689a801f951157", "8668f9e9d1a64b86b31f260b8056cd19"], "properties": ["frame, roof, trusses", "color, red, blue, green"], "captions_pred_pc": ["a black and white drawing of a metal grate", "a black and white drawing of a butterfly on a white background a black and white drawing of a butterfly on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a house under construction", "a 3d model of a pair of cylindrical towers with a staircase leading up to the top of one of the towers"], "question": "which building is more colorful", "label": 1}, {"captions": [" a yellow and white structure with yellow poles.", " a small building with a yellow roof, featuring a box, a chair, and a plane flying overhead."], "sample_ids": ["bada91e216fd486d9e3e356d48978f33", "a2354f13774340d392fbf33564934aab"], "properties": ["color, shape, poles", "building, roof, yellow"], "captions_pred_pc": ["a black and white drawing of a dotted pattern on a white background a black and white drawing of a dotted pattern on a white background royalty free illustration", "a black and white image of a cell phone"], "captions_pred_image": ["a 3d model of a building with multiple levels", "a 3d rendering of a machine with a conveyor belt"], "question": "which entity has a roof that is yellow", "label": 1}, {"captions": [" a robot with a red, bloody head and red eyes, accompanied by a small toy rocket and a white and red spaceship.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["dd61409988b7464ab025cc1c15f12f43", "a17477b445b3443189dad22f768b888b"], "properties": ["red, eyes, rocket", "roof, pillar, stairs"], "captions_pred_pc": ["of a black and white image of a clock", "a black and white image of a square with dots"], "captions_pred_image": ["a 3d model of a futuristic motorcycle helmet on a white background", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": ["s of a skateboard, snowboard, door, and three pairs of shoes, along with a paper mask and a paper with a hole.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["a267d906e4cf4d36bfe841c6cc9e698b", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["s of, snowboard, shoes, mask", "house, pool, balcony"], "captions_pred_pc": ["above a black and white image of a person standing on a piece of paper", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a black and white image of a surfboard flying through the air", "a 3d model of a modern house"], "question": "which entity has a pool", "label": 1}, {"captions": [" a house with a green roof and lawn.", "a white of a spaceship and building."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "bf7d4277c9184d35abdec85bd5e25956"], "properties": ["roof, green, lawn", "image, building, spaceship"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "a black and white drawing of a tree"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a 3d model of a white object on a gray background"], "question": "which image shows a spaceship and building?", "label": 1}, {"captions": ["a featuring a building, a plane, a printing machine, a large machine, a room with a computer, and a large gray box.", " a white sofa, chair, and box."], "sample_ids": ["7e2b63ba4ce24cecacea67dd052016c1", "4c59733ebd634594a921b7ace60e4142"], "properties": ["building, plane, room", "sofa, chair, box"], "captions_pred_pc": ["a black and white image of a rectangular frame with dots on a white background a black and white image of a rectangular frame with dots on a white background royalty free illustration", "a black and white drawing of a couch with dots"], "captions_pred_image": ["a 3d model of a box with a lot of items inside", "a 3d model of a white chair"], "question": "which entity has a box", "label": 1}, {"captions": [" a small house with a blue roof, a door, and a pool.", " a small bedroom with wooden floors, walls, roof, and shelf."], "sample_ids": ["40c52c2d278345c5b4e8d00a991271dc", "e602ac60041f4b4f84c044161e478781"], "properties": ["door, roof, pool", "floor, wall, roof"], "captions_pred_pc": ["of a black and white photo of a window with fringes", "above a black and white image of a decorative metal bar"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of a room with wooden walls and a rug on the floor"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": [" a tall glass tower featuring blue, green, and white squares.", " of a white human skull with broken bone elements."], "sample_ids": ["405d68eda26c401fbcddc7e3a457c74e", "6ca0f91b85464d7a845b3977351dd0b5"], "properties": ["color, shape, height", "color, white, skull"], "captions_pred_pc": ["of a black and white photo of a pair of earrings", "a black and white image of a cat's x-ray"], "captions_pred_image": ["a black and white photograph of a metal sculpture on a pedestal", "a 3d model of a human skull in white"], "question": "which entity is white", "label": 1}, {"captions": [" a white rock with green grass and moss on it.", " a small white house with stairs and a spiral staircase, featuring a white table and ceiling light."], "sample_ids": ["0b53ca3fca7b4ef7bef27cb287daf32e", "e9e1cc7fae22458197a61f43a9c355f4"], "properties": ["color, grass, moss", "house, staircase, table"], "captions_pred_pc": ["above a black and white drawing of a long, curved line on a white background", "above a black and white photograph of a dog in a frame"], "captions_pred_image": ["a 3d sculpture of a piece of white chocolate in the shape of a map", "a 3d model of a small house with a spiral staircase"], "question": "which entity is a building?", "label": 1}, {"captions": ["a featuring a boat, table, chairs, umbrella, and solar panel.", "a white of a man with arms outstretched."], "sample_ids": ["0f0eb3a198d341d28f809b6d7634be8a", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["boat, table, chairs, umbrella, solar panel", "image, color, white"], "captions_pred_pc": ["a black and white illustration of a boat with an umbrella", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a 3d model of a boat, a table, chairs, and an umbrella", "a 3d model of a man with his arms outstretched"], "question": "which image is black and white?", "label": 1}, {"captions": [" a large house/building structure with a roof.", " a small house with a pink and blue roof and a white ceiling featuring wooden beams."], "sample_ids": ["82859e4c6d4e4bbea94b6252bef1d398", "443554d4d7044c66aa8cbff63c737589"], "properties": ["roof, structure, house", "roof, color, pink, blue"], "captions_pred_pc": ["a black and white photograph of a metal sculpture", "a black and white square with dots all over it"], "captions_pred_image": ["a 3d model of a large white structure", "a 3d model of a roof with a triangular design royalty-free 3d model preview no.2"], "question": "which structure has a roof", "label": 1}, {"captions": [" a small house with a roof.", " a small building with windows and a roof."], "sample_ids": ["9578e8de15ec44ce802072aaa4df3910", "0ef2cac27e364c0687afae7ab5040cc3"], "properties": ["roof, house, small", "roof, windows, building"], "captions_pred_pc": ["above a black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and", "a black and white square made up of small dots on a white background"], "captions_pred_image": ["a 3d model of a small house", "a 3d model of an apartment building royalty free 3d model preview no 3"], "question": "which building has a roof", "label": 1}, {"captions": [" a wall-mounted shelf with three compartments and vents.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["77df1bccf94249988927d3dda97ae273", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["compartments, wall-mounted, vents", "camera, speaker, ceiling fan"], "captions_pred_pc": ["a black and white drawing of a boat with dots all over it", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a wall mounted air conditioning unit on a gray background royalty free 3d model no.", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker?", "label": 1}, {"captions": [" of a black shark", "a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier."], "sample_ids": ["01b98721613b4c0ea23982c048955c1f", "ef8288c9fdfc4e0f9c1fe25d570a104e"], "properties": ["black, mouth, teeth", "color is white, yellow, plastic"], "captions_pred_pc": ["a black and white image of an object on a white background", "a black and white image of a metal bowl with dots"], "captions_pred_image": ["a 3d model of a black shark on a gray background royalty free 3d model preview no.2", "a white plastic container with a label on it"], "question": "which object is not a shark?", "label": 1}, {"captions": ["a 3d white arrow, letter s, toothpick, skateboard, and knife forming a logo.", " a house with a roof, wooden beams, and chimney."], "sample_ids": ["2ee9dcd863514073a849ece8ea7714dd", "be1376023c274bdda995d54f3694157f"], "properties": ["3D, toothpick, skateboard", "roof, beams, chimney"], "captions_pred_pc": ["above a black and white image of a person's hand holding a pencil", "a black and white drawing of a bathroom with a shower"], "captions_pred_image": ["a white toothbrush on a gray background", "a 3d model of a house with a roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" a white and black horned demon with bunny features.", " a small blue robot with a windmill."], "sample_ids": ["1d3537d1341a423f89990e9b06924904", "5f915b161cc74fedad52d8663b41f87a"], "properties": ["color, horns, features", "color, blue, robot"], "captions_pred_pc": ["above a black and white image of a pair of headphones", "a black and white image of a 3d sculpture"], "captions_pred_image": ["a 3d model of a skull with horns on its head", "a 3d model of a wind turbine"], "question": "which entity is a robot?", "label": 1}, {"captions": ["a small white wooden castle with flags, turrets, and two towers.", " a large steel and metal structure with a pool and scaffolding system."], "sample_ids": ["345b1c7d2d9b4524aa0dcdc3cd27e4da", "5850d5c7223447db816081d50292fec0"], "properties": ["turrets, flags, towers", "structure, material, pool"], "captions_pred_pc": ["a black and white illustration of a square made up of many small dots on a white background", "a black and white drawing of a bridge with chains"], "captions_pred_image": ["a 3d model of a castle with towers and a drawbridge royalty-free 3d model preview no.1", "a 3d model of a large concrete structure"], "question": "which structure is made of metal", "label": 1}, {"captions": [" of a house with multiple roofs and a suspended ceiling, featuring a white and purple tray.", "a black and gold cylindrical object with a gold handle, resembling a cigarette lighter, lamp, and pen."], "sample_ids": ["7907916e8f524df5b16eb566497db83e", "c9b1c89380e947f58aa06eb56c93c6d8"], "properties": ["color, roof, tray", "- color is black and gold- shape is cylindrical- material is metal"], "captions_pred_pc": ["a black and white image of a metal object", "a black and white image of a circular object on a white background"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d model of a black and white object on a gray background"], "question": "which object is made of metal", "label": 1}, {"captions": ["a white featuring a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet.", " of a person breaking through a brick wall."], "sample_ids": ["f28f57745af04115b0bacdde80c3b9ee", "0708be2186d24d52815e8ac9d751fc37"], "properties": ["a pair of binoculars, a coffee mug, a box with two handles, a trash can, and a toilet", "image, brick, wall"], "captions_pred_pc": ["a black and white illustration of a pair of sunglasses", "for a black and white illustration of an ice cream cone"], "captions_pred_image": ["a 3d model of a medieval drinking horn 3d model available for 3d printing royalty free 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d model no. 3d", "a 3d model of a person breaking through a brick wall"], "question": "which image shows a person breaking through a wall?", "label": 1}, {"captions": ["a featuring a black and white sailboat, a black and green bird, a triangular lamp, a helicopter, and a kite.", " a futuristic white sports racing car."], "sample_ids": ["a59efdbb28a241b8866bfc094c8c80b2", "95f5d1518f1b4a638f4bc5e444a7e1a1"], "properties": ["a, bird, lamp", "color, white, futuristic"], "captions_pred_pc": ["a silhouette of a person standing in the air", "| all images person 2019 3d model by person | person 2019 3d model by person"], "captions_pred_image": ["a black and white image of a futuristic sculpture", "a 3d model of a futuristic sports car"], "question": "which car is white", "label": 1}, {"captions": ["a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars.", " a small square table with an integrated staircase and a ceiling light."], "sample_ids": ["1b3945962a4b4cda9fe939dc5d63e789", "bded33af34104b9686b845dfd18309a9"], "properties": ["a room, a cake, a table", "table, staircase, light"], "captions_pred_pc": ["a black and white illustration of an object on a white background", "above a black and white image of a square with a square in the center"], "captions_pred_image": ["a 3d rendering of a white room with various items in it", "a 3d model of a small table with a staircase"], "question": "which table has a staircase?", "label": 1}, {"captions": ["a small clay jug with a face, handle, and spout, depicted as a .", " a small wooden house with a green roof."], "sample_ids": ["ceee98c20f23424195da092156905ec4", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["face, handle, spout", "roof, color, green"], "captions_pred_pc": ["a black and white image of a small, circular object", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a white ceramic vase with a face on it", "a 3d model of a house with a ladder"], "question": "which entity has a roof", "label": 1}, {"captions": ["a pair of green sneakers with orange laces and star designs.", " a staircase with a railing, table, and chair, featuring a square ceiling light."], "sample_ids": ["8cf3790e236a4d9ebe21b028646792b2", "51a0fba79bce472a8b827b78a110b77f"], "properties": ["color, green, orange, star", "stair, table, chair"], "captions_pred_pc": ["a black and white photo of a pair of shoes", "for a black and white image of a toilet paper holder"], "captions_pred_image": ["a pair of converse sneakers on a white background with stars surrounding the shoes", "a 3d model of a staircase in a room"], "question": "which entity has a table?", "label": 1}, {"captions": ["a 3d object featuring a fish with a long nose and open mouth, a cat head with red, yellow, and green stripes, a frog with a long snout, a white horse with green and yellow stripes, and a wolf mask.", "a victor calculator with a black plastic cover and wall-mounted design."], "sample_ids": ["276699bb0f974c47b4e2954cfcd1651c", "88ffa01f4fc34a8cb3e2a659e9e26125"], "properties": ["a, color, white", "cover, black, plastic"], "captions_pred_pc": ["a black and white image of a skull in the shape of a butterfly", "of a black and white image of a skateboard"], "captions_pred_image": ["a 3d model of an animal with a long nose", "a victor calculator on a white background"], "question": "which object has a black plastic cover", "label": 1}, {"captions": [" a house featuring a purple roof and a suspended ceiling with a light fixture.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["579b43057ef74bd08d06bdd3e8d973a0", "c3a82df41875402285608ef13a55df57"], "properties": ["roof, purple, suspended", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white image of a piece of paper with a pattern of dots on it", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of the roof of a building royalty free 3d model no.", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": ["a featuring a table with yellow legs, buildings and houses with yellow columns and roofs, a steel structure with yellow poles, and a suspended ceiling with yellow poles.", " a green mountain with trees and grass."], "sample_ids": ["8a319c41f6214ee78bf11069ec8535ab", "08fb23bdc67b4b0ba5fc64ea5c97e5f7"], "properties": ["yellow, table, roof", "mountain, grass, tree"], "captions_pred_pc": ["a black and white drawing of a floor plan", "in 15 words or less a black ink blot on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white structure with multiple tables and chairs", "a 3d model of a mountain with snow on it"], "question": "which entity has more grass", "label": 1}, {"captions": ["a pair of yellow pliers.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["570d29f10e5b428b91da27cff52bac56", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, yellow, pliers", "a, material, clay"], "captions_pred_pc": ["of a pair of pliers on a white background vector illustration of a pair of pliers on a white background illustration", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a pair of scissors on a white background", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": [" a small gnome chandelier with three candles, featuring red and blue lights and adorned with pink and blue ribbons.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["6398f4e75a2d480da58396827ac64249", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["- color is red, blue, pink", "a, material, clay"], "captions_pred_pc": ["for a black and white image of an object on a white background", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a white chandelier with three light bulbs hanging from the ceiling", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": [" a green, spiked cube-like creature with eyes and a liquid-like texture.", " a small white house with stairs and a spiral staircase, featuring a white table and ceiling light."], "sample_ids": ["0d2850148a154f3ca72d72817e120759", "e9e1cc7fae22458197a61f43a9c355f4"], "properties": ["texture, spikes, eyes", "house, staircase, table"], "captions_pred_pc": ["a black and white drawing of a flower", "above a black and white photograph of a dog in a frame"], "captions_pred_image": ["a 3d model of an ice cube on a white background royalty-free 3d model preview no. 3", "a 3d model of a small house with a spiral staircase"], "question": "which entity is a building?", "label": 1}, {"captions": [" a staircase with a railing, table, and chair, featuring a square ceiling light.", " a small building with windows and a roof."], "sample_ids": ["51a0fba79bce472a8b827b78a110b77f", "0ef2cac27e364c0687afae7ab5040cc3"], "properties": ["stair, table, chair", "roof, windows, building"], "captions_pred_pc": ["for a black and white image of a toilet paper holder", "a black and white square made up of small dots on a white background"], "captions_pred_image": ["a 3d model of a staircase in a room", "a 3d model of an apartment building royalty free 3d model preview no 3"], "question": "which entity has a roof", "label": 1}, {"captions": [" a house featuring a roof with truss system, framing, insulation, and a ceiling light.", " of a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["39876e69e3914d99a07e0dc59611c5c0", "8cd3f5ff0fc041eca9a480faa6739480"], "properties": ["roof, truss system, framing", "roof, trusses, beams"], "captions_pred_pc": ["a black and white drawing of a window with dots all over it", "in 15 words or less a black and white drawing of a window"], "captions_pred_image": ["a 3d model of the roof of a house", "a 3d model of a roof structure"], "question": "which entity has a roof with trusses and beams?", "label": 1}, {"captions": [" of a mushroom cloud with white tree and coral plant elements, featuring blue and green leaves.", "a white and yellow plastic container with a lid and label, resembling a smoke detector or air purifier."], "sample_ids": ["bca5233d878e4cf09b5bc2bb6f3915b0", "ef8288c9fdfc4e0f9c1fe25d570a104e"], "properties": ["color, shape, texture", "color is white, yellow, plastic"], "captions_pred_pc": ["a black and white image of a square with dots on it", "a black and white image of a metal bowl with dots"], "captions_pred_image": ["a 3d model of a tree made out of white flowers", "a white plastic container with a label on it"], "question": "which entity is white and yellow?", "label": 1}, {"captions": [" a destroyed house and plane amidst a town with buildings.", "a featuring a small boat, a rock with a hole, and blue water."], "sample_ids": ["0fd3ddca09194b8f94ef731af3b64a08", "7ccdffc0d6404e8d9144260255ea0c5c"], "properties": ["house, plane, town", "water, boat, rock"], "captions_pred_pc": ["above a black and white drawing of a piece of paper", "a black and white illustration of a surfboard"], "captions_pred_image": ["a 3d model of a damaged building", "a 3d image of an animal laying on the ground"], "question": "which entity has a boat?", "label": 1}, {"captions": [" a futuristic white sports racing car.", "a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars."], "sample_ids": ["95f5d1518f1b4a638f4bc5e444a7e1a1", "1b3945962a4b4cda9fe939dc5d63e789"], "properties": ["color, white, futuristic", "a room, a cake, a table"], "captions_pred_pc": ["| all images person 2019 3d model by person | person 2019 3d model by person", "a black and white illustration of an object on a white background"], "captions_pred_image": ["a 3d model of a futuristic sports car", "a 3d rendering of a white room with various items in it"], "question": "which entity has a table?", "label": 1}, {"captions": ["a white teddy bear, cloud, skull, octopus, and bird in various positions on a gray background.", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["dd5849aced0443b1b4b38d413f7e06c4", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["background, color, white", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["a black and white image of a cat's head", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a 3d model of an animal skull in white on a gray background", "a 3d rendering of a plastic box with several compartments"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a destroyed house and plane amidst a town with buildings.", " a castle on an island with a small floating house, trees, and clouds."], "sample_ids": ["0fd3ddca09194b8f94ef731af3b64a08", "c4c09479570943e2845fbd4c6a450568"], "properties": ["house, plane, town", "castle, island, house"], "captions_pred_pc": ["above a black and white drawing of a piece of paper", "above a black and white illustration of a group of dots in the shape of a circle"], "captions_pred_image": ["a 3d model of a damaged building", "a 3d model of a small house on an island"], "question": "which entity has a house on an island?", "label": 1}, {"captions": ["a 3d white box with black trim, stripes, and handles.", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["55b26130f1514032be078e13fd982905", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["color, shape, material", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["a black and white drawing of a square made up of dots", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a 3d model of a white box with a black handle", "a 3d rendering of a plastic box with several compartments"], "question": "which box is made of plastic", "label": 1}, {"captions": [" a large building with a roof and windows.", " a multicolored metal building structure with a roof."], "sample_ids": ["32d1fbd3ee91426882290305f70021e6", "22483891fd124baca3bbc6a6a49adc9c"], "properties": ["roof, windows, building", "color, roof, structure"], "captions_pred_pc": ["of a black and white photo of a diamond buckle", "of a black and white photo of a bike on a white background"], "captions_pred_image": ["a 3d model of an apartment building royalty free 3d model preview no.2", "a 3d model of a barn structure"], "question": "which structure has a roof", "label": 1}, {"captions": [" a blue and white rifle with a blue handle.", " a wooden staircase with a red and white railing and a red arrow."], "sample_ids": ["c0fd39c5633a4bfc8eae7b88bd667ce4", "bb8bb4c9972d4b718b8bbd3ed5fdd14d"], "properties": ["color, blue, white, handle", "arrow, red, white"], "captions_pred_pc": ["a black and white image of a needle on a white background", "above a black and white image of a square with a white cross in the center"], "captions_pred_image": ["a black and white 3d model of a gun", "a 3d model of a spiral staircase"], "question": "which object has a red arrow?", "label": 1}, {"captions": [" a small house with a tree, pool, and pond in a green and blue landscape.", " a small house with a pink and blue roof and a white ceiling featuring wooden beams."], "sample_ids": ["e22489ba182f45cb81f0a83f22abe9bd", "443554d4d7044c66aa8cbff63c737589"], "properties": ["house, tree, pool", "roof, color, pink, blue"], "captions_pred_pc": ["in 15 words or less a black and white image of a square with dots around it", "a black and white square with dots all over it"], "captions_pred_image": ["a 3d model of a house and a tree in a box royalty free 3d model preview no.3", "a 3d model of a roof with a triangular design royalty-free 3d model preview no.2"], "question": "which house has a pink and blue roof?", "label": 1}, {"captions": [" a white plastic box/tray with a hole in the middle.", " a destroyed car with rusted, broken metal and torn paper."], "sample_ids": ["04f8bfad8ad14795aced8a83ea30ca60", "3fe31c3bf5cd4574a8ca02222411a988"], "properties": ["color is white, material is plastic, shape is box", "metal, rusted, paper"], "captions_pred_pc": ["a black and white image of a rectangular tray on a white background", "a black and white drawing of a person sitting in a chair"], "captions_pred_image": ["a 3d model of a white plastic tray", "a black and white image of a piece of debris on the ground"], "question": "which object is made of metal", "label": 1}, {"captions": ["white cube", " a clear glass table with metal legs and balls on top."], "sample_ids": ["7ddfa25dc0d448c390b2227ed83616cc", "7c2bfa826f274377ac21f48d510848c3"], "properties": ["color is white, shape is cube, material is plastic", "glass, metal, balls"], "captions_pred_pc": ["a black and white dotted square on a white background", "a black and white image of a wine glass"], "captions_pred_image": ["a 3d model of a white box on a gray background", "a clear acrylic foosball table"], "question": "which object is made of glass", "label": 1}, {"captions": ["a white ceramic vase with the words 'happy' and 'fish' written on it.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["243cd2c469984313b1522dca099eefd3", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["color, white, material, ceramic", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white image of a vase in the shape of a fish", "a black and white image of a cone shaped object"], "captions_pred_image": ["a white vase on a grey background", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": ["a floor lamp with a pleated white and beige shade and a wooden base.", "a black floor lamp with a wooden round base, black shade, and attached cord."], "sample_ids": ["1ea7bb08894243f5b1f1200ee35fb40a", "03db87a02e3847f8a712a362de9a314d"], "properties": ["base material, wood, shade color, white, beige", "Base material is Wooden, Shade color is Black, Cord color is Black"], "captions_pred_pc": ["of a 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background 3d model of a fan blade on a white background", "a black and white illustration of a bomb on a white background"], "captions_pred_image": ["a floor lamp with a white shade and a chrome base", "a black floor lamp with a round shade on top"], "question": "which lamp has a black cord?", "label": 1}, {"captions": [" of a wooden windmill with a red roof.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["2ad8fca30285483d8b7f602fa078215d", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["roof, color, red", "island, mountain, grass"], "captions_pred_pc": ["a white and black image of a snowflake in the shape of a snowflake", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d model of a windmill on a gray background", "a 3d image of a small island in the middle of a lake"], "question": "which entity has a mountain?", "label": 1}, {"captions": ["a 3d collection featuring a black and white motorcycle helmet, a white plastic cup with a black lid, a cow's head, a white face mask, a white helmet, a white and black earphone, and a black and white chair.", " a house with roof trusses and wooden beams on a suspended ceiling."], "sample_ids": ["06a5097b2bcf4192b8b881e04a74a580", "3c2e3a3670b042069bd8290e2c357702"], "properties": ["color, white, black, white", "roof trusses, beams, suspended ceiling"], "captions_pred_pc": ["a black and white image of a toothbrush on a white background", "above a black and white drawing of a building"], "captions_pred_image": ["a 3d illustration of a black and white cow's head on a white background", "a 3d model of a house with a roof in progress royalty free 3d model preview no. 1"], "question": "which entity has a suspended ceiling", "label": 1}, {"captions": [" a futuristic black and gold spaceship with a gun on it.", "a featuring a teddy bear on a rainbow, accompanied by a baby, a cat, a dog, and a drink, with a pink and green sign displaying \"close the sky over dreams.\""], "sample_ids": ["6c34eb48b0c44667864a2af3fed92d6c", "80dfbe37b3d74f11b712ca1ad6570f70"], "properties": ["color, shape, gun", "image, color, pink"], "captions_pred_pc": ["above a black and white photograph of a fighter jet", "above a black and white photograph of a dog in a bowl"], "captions_pred_image": ["a 3d model of a futuristic space fighter", "a 3d sculpture of an animal on a piece of paper"], "question": "which entity is a picture of a rainbow?", "label": 1}, {"captions": [" a pink and gold robotic woman's hand with purple flowers.", "a black and gold cylindrical object with a gold handle, resembling a cigarette lighter, lamp, and pen."], "sample_ids": ["accfe13a2bd448828e45c790118e1b7d", "c9b1c89380e947f58aa06eb56c93c6d8"], "properties": ["color, hand, flowers", "- color is black and gold- shape is cylindrical- material is metal"], "captions_pred_pc": ["a black and white illustration of a hand", "a black and white image of a circular object on a white background"], "captions_pred_image": ["a 3d model of a human leg with a broken ankle royalty-free 3d model preview", "a 3d model of a black and white object on a gray background"], "question": "which object is made of metal", "label": 1}, {"captions": ["a featuring a building, a plane, a printing machine, a large machine, a room with a computer, and a large gray box.", "a featuring a white box with a hole, a paper clip, and a lock."], "sample_ids": ["7e2b63ba4ce24cecacea67dd052016c1", "839e143bb1684aaa955f2c3e0cf4eef2"], "properties": ["building, plane, room", "box, paper clip, lock"], "captions_pred_pc": ["a black and white image of a rectangular frame with dots on a white background a black and white image of a rectangular frame with dots on a white background royalty free illustration", "above a black and white image of a clock in the shape of a spiral"], "captions_pred_image": ["a 3d model of a box with a lot of items inside", "a 3d model of a stapler with a staple in it royalty free 3d model no."], "question": "which entity has a lock?", "label": 1}, {"captions": [" a large stone bowl with a hole in it.", " a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole."], "sample_ids": ["33dda0d266df43f98145f728f6f49291", "b16fb21cda9a4a21a024df749c2304f4"], "properties": ["size, material, shape", "roof, ceiling, hole"], "captions_pred_pc": ["a black and white illustration of a dotted circle on a white background", "a black and white image of a square with dots on it"], "captions_pred_image": ["a ceramic bowl with a hole in the middle", "a 3d model of a small house and a tree in the foreground"], "question": "which entity is a building?", "label": 1}, {"captions": ["a featuring a rock formation with various statues, including a woman, an eagle, and elements like wood and a shell.", " a potted christmas pine tree."], "sample_ids": ["36d90269173b4d1a84dbd61664593f66", "460c8f3034a844159826fac3b8aa35a5"], "properties": ["a, eagle, wood", "a, color, green"], "captions_pred_pc": ["a black and white illustration of a map with dots all over it", "a black and white illustration of a snowflake on a white background"], "captions_pred_image": ["a 3d model of a person sitting on the edge of a cliff", "a 3d model of a christmas tree in a vase"], "question": "which object is greener", "label": 1}, {"captions": [" a black and white box-like object with various interpretations, such as a coffee table, building, book, and ceiling fixture.", "a featuring a room with various objects, a display stand, a cake with a slice, a table with a cup of coffee, a building with a rainbow, a sand dune with a sun, and a ceiling light with stars."], "sample_ids": ["404d7e2cd8894c31bdda02d2b3196464", "1b3945962a4b4cda9fe939dc5d63e789"], "properties": ["black, white, coffee table", "a room, a cake, a table"], "captions_pred_pc": ["a black and white drawing of a square with dots on it", "a black and white illustration of an object on a white background"], "captions_pred_image": ["a black and white 3d model of a building", "a 3d rendering of a white room with various items in it"], "question": "which entity has a room?", "label": 1}, {"captions": [" a staircase with a railing, table, and chair, featuring a square ceiling light.", " a small house featuring a wooden floor, stairs, a bathroom with a sink and toilet, and a white box on a table."], "sample_ids": ["51a0fba79bce472a8b827b78a110b77f", "73f2780847f14547b9ae5f9e8a81e348"], "properties": ["stair, table, chair", "floor, stairs, bathroom"], "captions_pred_pc": ["for a black and white image of a toilet paper holder", "a black and white image of a leopard print pattern"], "captions_pred_image": ["a 3d model of a staircase in a room", "a 3d model of a small table"], "question": "which entity has a bathroom?", "label": 1}, {"captions": [" a small green chair with a slanted back and white base.", " a mountainous landmass, resembling a small island or state like kentucky or wyoming."], "sample_ids": ["7f93c12cbbc74e579d5f0430cfa0010f", "8ca9b999b69c4965bd9eb4445d605bf2"], "properties": ["color, white, base, slanted", "mountainous, landmass, state"], "captions_pred_pc": ["above a black and white drawing of a chair", "a black and white map of the state of new york"], "captions_pred_image": ["a 3d model of a chair", "a 3d model of a piece of paper"], "question": "which entity is not a state?", "label": 1}, {"captions": [" a white table with grey legs, a white top, writing on it, and three legs.", " a house with a roof and beams."], "sample_ids": ["68e0d097351843a3980421f2ae624c59", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["white, top, writing", "roof, beams, house"], "captions_pred_pc": ["a group of black dots on a white background stock illustration a group of black dots on a white background royalty free illustration", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a table with writing on it and a mouse on top of it", "a 3d model of a building with a roof"], "question": "which entity has a roof and beams", "label": 1}, {"captions": ["a featuring a bowl of soup, a brush, a knife, a slice of pizza, a torn piece of paper, and a roof with a hole in it.", " a wooden billiard table with legs."], "sample_ids": ["db19e46828c94b1a8b9a6ca9f673c604", "b20ad62516fa467ba6e8de063998e8e4"], "properties": ["a, roof, soup", "legs, material, wood"], "captions_pred_pc": ["in 15 words or fewer a black and white illustration of the moon", "a black and white drawing of a rectangular shaped object"], "captions_pred_image": ["a 3d model of a bowl and chopsticks on a sheet of paper", "a black and white image of a pool table"], "question": "which object has legs", "label": 1}, {"captions": ["a black motorcycle helmet with a face mask and visor.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["ad6df43a2ce24edfb15f5bb64755ed0d", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, black, visor", "a, material, clay"], "captions_pred_pc": ["a black and white circular shape made up of many small dots on a white background a black and white circular shape made up of many small dots on a white background illustration", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a black motorcycle helmet with a visor on top", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": [" of a house with a roof truss, chimney, and suspended ceiling.", " a house with a wooden-framed roof structure."], "sample_ids": ["9401dfc901b2447a9c0eb27da56854d7", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["roof truss, chimney, suspended ceiling", "roof, material, wood"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a bird in a cage on a white background illustration of a", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a house with a roof", "a 3d model of a building with a roof"], "question": "which roof is made of wood", "label": 1}, {"captions": [" a bridge structure with a pier, stairway, railings, and red and green elements.", "a low-poly of a white, shattered sphere."], "sample_ids": ["23701a9674204ea881fb8af9914b1924", "94119660e7054fc5b7baa68a4e39968c"], "properties": ["color, pier, railings", "sphere, color, texture"], "captions_pred_pc": ["a black and white illustration of a computer keyboard", "a black and white illustration of a dandelion on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white background illustration of a dandelion on a white background, isolated on a white"], "captions_pred_image": ["a 3d model of a staircase in the middle of a field", "a 3d model of a cracked egg"], "question": "which entity is a sphere?", "label": 1}, {"captions": [" of a small camera, black stereo system, speaker, and ceiling fan.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["c6cef5b8f3a741e4a619e3441bee54d5", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["camera, speaker, ceiling fan", "camera, speaker, ceiling fan"], "captions_pred_pc": ["for a black and white image of an object on a white background", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a vintage camera royalty free 3d model preview no 3", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has a speaker and a ceiling fan?", "label": 0}, {"captions": [" a large metal bomb.", "a 3d red and white santa claus teddy bear with a flying plane and floating hat."], "sample_ids": ["00c6c9da8a3943e8b096c1a3134d1d8d", "0797e97b272c43d59bfeb5821421c5eb"], "properties": ["size, material, shape", "hat, color, white"], "captions_pred_pc": ["above a black and white image of a dome shaped object", "a black and white image of a doll made of beads"], "captions_pred_image": ["a 3d model of an industrial ventilation fan", "a black and white photo of a snowflake falling from the sky"], "question": "which object is made of white", "label": 1}, {"captions": [" a large house/building structure with a roof.", " a house with a yard, trees, bushes, and surrounding buildings."], "sample_ids": ["82859e4c6d4e4bbea94b6252bef1d398", "7f8942ef51dd4246993a587a12df168c"], "properties": ["roof, structure, house", "house, yard, surrounding buildings"], "captions_pred_pc": ["a black and white photograph of a metal sculpture", "a black and white image of a truck on a white background"], "captions_pred_image": ["a 3d model of a large white structure", "a 3d model of a house in the middle of a field"], "question": "which house has a yard", "label": 1}, {"captions": [" of a gray stereo system with a blue drawer, resembling a printer and computer with a blue screen, featuring a cd player.", " of a white and brown sea shell with a hole and small pearls on it, resembling a sea urchin."], "sample_ids": ["3d4d965e67744415b69ff6aaeb11f420", "411c164757fc4de68dfecb35fa858223"], "properties": ["color, screen, drawer", "resembles, sea urchin, shell"], "captions_pred_pc": ["above a black and white image of a brush", "in 15 words or less a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a white background illustration of a black and white illustration of a dandelion on a"], "captions_pred_image": ["a 3d model of a printer on a white background royalty free 3d model no.2", "a 3d model of a sea urchin"], "question": "which entity is a shell?", "label": 1}, {"captions": [" of a wooden tv stand with two drawers and handles.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["6409380e790442e6a5733eb447d4c510", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["Drawer, Handle, Wood", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white drawing of a line of dots on a white background", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d rendering of a wooden entertainment center", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": ["s of a rock, boat, plane, and leaf on a stick.", " a house featuring a roof with wooden trusses and a ladder."], "sample_ids": ["be0884a7ced34b3d92687b6087798a1e", "3cd410c4359a4cef98702963a2b9802b"], "properties": ["s, stick, leaf", "roof, trusses, ladder"], "captions_pred_pc": ["above a black and white drawing of an object floating in the sky", "a black and white drawing of a tv screen on a white background"], "captions_pred_image": ["a black and white photograph of a rock on a sandy surface", "a 3d model of the roof of a building"], "question": "which entity has a roof with wooden trusses and a ladder?", "label": 1}, {"captions": ["a featuring a snake, an eel, a cube, a sword, a dice, a stick, and a blue ball.", "a featuring a fireplace with a broom, shovel, and other items, a man and a dog, a table with a bucket, and a vase with a pipe."], "sample_ids": ["7940570f74c14baa83ebf5f50cd2a720", "fdcbb46224a44faca5c3fb20264eb2e7"], "properties": ["a snake, an eel, a cube, a sword, a dice, a stick, a blue ball", "broom, shovel, fireplace"], "captions_pred_pc": ["a black mascara brush on a white background", "a close-up view of a white plastic bag with a small hole in it"], "captions_pred_image": ["a 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife and a cube on a white background 3d model of a knife", "a 3d model of an outdoor fireplace"], "question": "which entity is a fireplace?", "label": 1}, {"captions": [" a blue and white building structure with a table and suspended ceiling.", " a small white archway structure resembling a building."], "sample_ids": ["fc63507c452c4a4b89f614f2b8bff76a", "5ad02458cf394134a902e25001d2ffef"], "properties": ["color, table, ceiling", "structure, building, archway"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a room", "for a black and white illustration of a castle on a hill"], "captions_pred_image": ["a 3d model of a table with multiple tables stacked on top of each other", "a 3d rendering of a white object on a white surface"], "question": "which structure is white", "label": 1}, {"captions": [" a white plastic container with a lid, a small box, a cup, a bottle, and a jar.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["20a02705a66f460492e07345e84a62ed", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["a box, a cup, a bottle, a jar", "island, mountain, grass"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic cap, and a plastic container", "a 3d image of a small island in the middle of a lake"], "question": "which entity has grass", "label": 1}, {"captions": [" a small house with a pond and situated on a rock.", " a small house with a tree in front, situated on a hill."], "sample_ids": ["92859eb82a344134806b37cc209927c6", "3bde44b5f10946398f1bb9843dc14caa"], "properties": ["house, rock, pond", "house, tree, hill"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a toaster", "a black and white photo of a cell phone in a puddle of water"], "captions_pred_image": ["a 3d model of a house in the middle of a field", "a 3d model of a house in the middle of a field"], "question": "which house is situated on a hill?", "label": 1}, {"captions": [" a house with a roof structure and toothbrushes.", " a white and gold mirror on a wooden easel stand."], "sample_ids": ["7632d1ba4e8144c19484c263b6074d0c", "0d10d734448d4a5d8d07b938c12d9d80"], "properties": ["house, roof, toothbrushes", "color, white, gold"], "captions_pred_pc": ["a black and white illustration of the letter 'b' isolated on a white background illustration", "for a black and white image of a shoe on a white background"], "captions_pred_image": ["a 3d rendering of a white box with a lot of blades", "a 3d model of a standing mirror on a white background"], "question": "which object is white and gold?", "label": 1}, {"captions": [" a futuristic black and gold spaceship with a gun on it.", "a featuring a building, a plane, a printing machine, a large machine, a room with a computer, and a large gray box."], "sample_ids": ["6c34eb48b0c44667864a2af3fed92d6c", "7e2b63ba4ce24cecacea67dd052016c1"], "properties": ["color, shape, gun", "building, plane, room"], "captions_pred_pc": ["above a black and white photograph of a fighter jet", "a black and white image of a rectangular frame with dots on a white background a black and white image of a rectangular frame with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a futuristic space fighter", "a 3d model of a box with a lot of items inside"], "question": "which entity has a room?", "label": 1}, {"captions": [" of a white sculpture, resembling a horse and paper plane, on a gray background.", " featuring a moss-covered rock, mushroom in grass, and green leaves with a brown spot."], "sample_ids": ["179b4438edfc4a43a27a83784f38ff4b", "34ebe81ae93841ca829efd15aee4d8c1"], "properties": ["color, background, white", "moss, mushroom, grass"], "captions_pred_pc": ["above a black and white image of a sculpture in the shape of a bird", "for a black and white illustration of a cloud on a white background"], "captions_pred_image": ["a 3d printed sculpture of a horse's head on a gray background", "a 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor in the lymphatic system 3d model of a cancerous tumor"], "question": "which entity has a brown spot?", "label": 1}, {"captions": ["a white of a building with columns, stairs, and railings.", " an old building with windows, doors, and a balcony on a street."], "sample_ids": ["c9ad30f336844b629cb237fa5b0d94f2", "706fb93f885d42f594e0ebbba632d2f2"], "properties": ["image, building, stairs", "building, balcony, street"], "captions_pred_pc": ["a black and white image of dots on a white background a black and white image of dots on a white background royalty free illustration", "in 15 words or less a black ink brush stroke on a white background"], "captions_pred_image": ["a 3d model of a multi-level building with stairs and balconies royalty-free 3d model no.", "a 3d model of an old building"], "question": "which building has a balcony", "label": 1}, {"captions": [" a small building with stairs and a glass floor, featuring a square table and a black square ceiling light.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["8aaad713b8834739b008ccf2f3d86cce", "c3a82df41875402285608ef13a55df57"], "properties": ["floor, table, light", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["above a black and white photograph of a window", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a black and white 3d model of a staircase on a platform", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a destroyed car with rusted, broken metal and torn paper.", " a small house with stairs, a roof, and a pillar."], "sample_ids": ["3fe31c3bf5cd4574a8ca02222411a988", "a17477b445b3443189dad22f768b888b"], "properties": ["metal, rusted, paper", "roof, pillar, stairs"], "captions_pred_pc": ["a black and white drawing of a person sitting in a chair", "a black and white image of a square with dots"], "captions_pred_image": ["a black and white image of a piece of debris on the ground", "a 3d model of a small building with a balcony"], "question": "which entity has a roof", "label": 1}, {"captions": [" the white text \"ekberkaslan\" with a row of white cubes and a numbered box.", " a house featuring a wooden roof structure with trusses and beams."], "sample_ids": ["87ee30d475f34b799c24bf7ef3a7b540", "97e000ff41094665afd94ea565da8b13"], "properties": ["- color is white- shape is cubes- number is 1", "roof, material, wood"], "captions_pred_pc": ["a close up of a black and white striped scarf", "a black and white drawing of a floor plan"], "captions_pred_image": ["a 3d image of the word ebercaskalan on a white background", "a 3d model of the roof of a building"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": [" a house featuring a green roof and red frame.", " a house featuring a pink-purple roof with trusses and wooden ceiling beams."], "sample_ids": ["00d9a408067d46afa127a404f63b4f65", "b6b6a3f82bdd47c3afaf9af885ba8703"], "properties": ["color, roof, green, frame, red", "roof, trusses, beams"], "captions_pred_pc": ["a black and white illustration of a house made of dots", "a black and white pattern of dots in the shape of a square royalty free illustration"], "captions_pred_image": ["a 3d model of a building with a metal roof", "a 3d model of the roof of a building"], "question": "which roof is made of trusses", "label": 1}, {"captions": ["a featuring a white and blue structure with a table, blue blocks, and suspended blue cubes.", " a white and gold mirror on a wooden easel stand."], "sample_ids": ["1d2cfe3a03004b62b17d3ce065658302", "0d10d734448d4a5d8d07b938c12d9d80"], "properties": ["color, table, blocks", "color, white, gold"], "captions_pred_pc": ["a group of people standing on top of each other on a white background a group of people standing on top of each other on a white background royalty free illustration", "for a black and white image of a shoe on a white background"], "captions_pred_image": ["a 3d model of a table with four legs", "a 3d model of a standing mirror on a white background"], "question": "which object is white and gold?", "label": 1}, {"captions": [" a spacious office featuring numerous desks, chairs, computers, and a ceiling with blue and white triangles and a ceiling light.", " a small house with a staircase, balcony, and wooden floor."], "sample_ids": ["0d7e4d9471414a21b4a5b18a54f7ec22", "e67e211004cb450cbaf8139dd74ba39b"], "properties": ["ceiling, light, desks", "floor, staircase, balcony"], "captions_pred_pc": ["a black and white drawing of a square on a white background", "a black and white drawing of a wallet"], "captions_pred_image": ["a 3d model of an office space with desks and chairs", "a 3d model of a bench on a wooden floor"], "question": "which entity has a wooden floor", "label": 1}, {"captions": [" of a white crocodile", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["525e4c1c86564ea287acbe93397d6d91", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["color, shape, size", "island, terrain, water"], "captions_pred_pc": ["a black and white image of a bird flying in the sky", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of a piece of white paper", "a 3d image of a small island in the middle of a lake"], "question": "which entity is a small island?", "label": 1}, {"captions": [" a colorful sunburst with pink, blue, and purple hues.", " of a small camera, black stereo system, speaker, and ceiling fan."], "sample_ids": ["46946704a6344eb18718fc5710782f6b", "c6cef5b8f3a741e4a619e3441bee54d5"], "properties": ["color, hue, saturation", "camera, speaker, ceiling fan"], "captions_pred_pc": ["of a black and white photo of a ceiling light fixture", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunburst light fixture 3d model of a sunbur", "a 3d model of a vintage camera royalty free 3d model preview no 3"], "question": "which entity has more speakers", "label": 1}, {"captions": ["a white 3d-printed ring featuring a fish design and the word \"defia,\" accompanied by a white pen and logo.", "a white of a woman with her arms outstretched."], "sample_ids": ["9a15d8285e614fb4b7d1cb7076a7b56a", "2cbfaa4fb2d84a6f94e67b4fd6e2f26f"], "properties": ["color, white, pen, logo", "image, color, white"], "captions_pred_pc": ["of a black and white photo of a pair of earrings", "a black and white silhouette of a person on a skateboard"], "captions_pred_image": ["a white toothbrush on a gray background", "a 3d model of a woman with her arms outstretched"], "question": "which entity is a white image?", "label": 1}, {"captions": [" a purple sphere with polka dots, flowers, and a hole, containing a pink ball inside.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["1000e3065aaa4d6fb93cea89b99e1748", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["color, shape, material", "door, lock, handle"], "captions_pred_pc": ["a black and white image of a dotted circle on a white background royalty free illustration", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a 3d model of an object with a ball in the center", "a black and white image of a door with a crack in it"], "question": "which entity is made of wood", "label": 1}, {"captions": [" a small wooden house.", " a large steel building with many columns and a pool."], "sample_ids": ["4cb4dba1237443eb8dc299530fa12521", "2ce649a4152a45bab60d8cafa1dcdeb3"], "properties": ["house, material, wood", "building material, pool, steel"], "captions_pred_pc": ["a house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots on a white background house made of dots", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d model of a small cottage", "a 3d model of a concrete structure"], "question": "which building is made of steel", "label": 1}, {"captions": [" of a wooden stand featuring two legs, six balls, and a row of teddy bears, resembling a puzzle toy, game board, and bookshelf.", " of a white shelf with four legs and hooks, accompanied by a ceiling light fixture."], "sample_ids": ["e36ba9c060cd49f48a0acc1790fcf049", "5017581716c7402581a82ebf08d427a3"], "properties": ["resembles, toy, bookshelf", "Shelf, color, white"], "captions_pred_pc": ["a black and white image of a book cover", "a black and white doormat on a white background stock illustration \u00a9 2018 iStock"], "captions_pred_image": ["a 3d rendering of a wooden display stand with six cups on it", "a 3d rendering of a white shelf"], "question": "which entity is a shelf?", "label": 1}, {"captions": ["a collection featuring a broken egg in a bowl, food wrapped in paper, torn and whole paper pieces, a white paper hat, a skull with a hat, a snowy iceberg, and a piece of cake.", " a mouse wearing a top hat and a teddy bear holding a spoon."], "sample_ids": ["3d7bd392c9a14f4ab7c0aae2cf75b487", "887e410d07854396b563325ae1929583"], "properties": ["hat, food, bowl", "hat, mouse, bear"], "captions_pred_pc": ["in 15 words or less a black and white image of dots on a white background", "a black and white illustration of a snowflake on a white background"], "captions_pred_image": ["a 3d model of the earth with a hole cut out of it", "a 3d model of a mouse wearing a top hat and bow tie"], "question": "which entity has a hat", "label": 1}, {"captions": [" a blue circuit board with electronic components.", " of a toy mushroom character with a white and brown head."], "sample_ids": ["4816a2780af54492b6692fd78347f1ac", "ae8a73809d4647c09cc82f403e47de1d"], "properties": ["color, blue, components", "color, head, white and brown"], "captions_pred_pc": ["a black and white drawing of a person sitting on a bench", "a black and white illustration of a butterfly sitting on a dandelion stock illustration"], "captions_pred_image": ["a 3d printed circuit board with various electronic components", "a 3d model of a gray and white cartoon character"], "question": "which entity is a toy?", "label": 1}, {"captions": [" a small white building with a door, resembling a box-shaped house.", "a black and white of a knife/sword with a handle."], "sample_ids": ["1b5fe88d0ff149ae9d8b4eb455c5c90c", "c7692fa635e049bda0a2039fa5a784a4"], "properties": ["shape is box, color is white, door is present", "image, color, black and white"], "captions_pred_pc": ["a black and white image of a person standing in front of a white background", "of a black and white knife on a white background"], "captions_pred_image": ["a 3d model of a white, open shelving unit", "a black and white image of a knife"], "question": "which entity is not a black and white image?", "label": 1}, {"captions": ["a white 3d-printed plastic container set with a lid, two small containers, and a hat-like attachment.", "a 3d wooden toy on a wooden plate with a piece of fruit and bread with a knife."], "sample_ids": ["1da865c75a5e4a57a17652975dae5474", "1c389c8f46b345838e515b9747c1f982"], "properties": ["color, white, plastic", "plate, fruit, knife"], "captions_pred_pc": ["a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a black dots on a white background a", "a black and white illustration of a hedgehog"], "captions_pred_image": ["a 3d model of a white box, a white lid, and a white container", "a person riding a skateboard on a wooden surface"], "question": "which object is made of wood", "label": 1}, {"captions": [" of a small white box or bench.", "a featuring a house with a fence, tennis court, playground with slide, and buildings with pipes, wires, and colored lines."], "sample_ids": ["dfc7c96b45b74328bce7418560fe76fe", "c516e491e5ee4313a4c06365ef13af3f"], "properties": ["white, bench, small", "house, fence, playground"], "captions_pred_pc": ["above a black and white illustration of a person standing on top of a skyscraper", "above a black and white drawing of an industrial machine"], "captions_pred_image": ["a 3d rendering of a white object on a white surface", "a 3d model of a room with a lot of wires"], "question": "which entity has a fence", "label": 1}, {"captions": [" a house with a roof structure and toothbrushes.", " a black and white cube-shaped building with a staircase."], "sample_ids": ["7632d1ba4e8144c19484c263b6074d0c", "587e65f2d904440488a98dfa9a4e9dbe"], "properties": ["house, roof, toothbrushes", "shape is cube, color is black, white"], "captions_pred_pc": ["a black and white illustration of the letter 'b' isolated on a white background illustration", "above a black and white photograph of a sculpture"], "captions_pred_image": ["a 3d rendering of a white box with a lot of blades", "a black and white 3d model of a building"], "question": "which building is a cube?", "label": 1}, {"captions": ["a 3d collection featuring a cash register, destroyed car, pos machine with credit card machine, broken cell phone, black and blue phone, atm machine, crocodile's head, and broken roof.", " a small wooden house with a green roof."], "sample_ids": ["d9681d1f6fad42ab8d498cba24339ca8", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["pos machine, credit card machine, cash register", "roof, color, green"], "captions_pred_pc": ["a black and white illustration of a glass bottle", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a vintage cash register sitting on top of a table", "a 3d model of a house with a ladder"], "question": "which entity has a green roof", "label": 1}, {"captions": [" a small house with a red roof.", " of two rocks with ice elements."], "sample_ids": ["085db9059b744673b5623b5338e02196", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["roof, red, house", "image is a rock with ice elements"], "captions_pred_pc": ["a black and white dotted square on a white background", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a small shed in the snow", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a blue and white cube with a hole in one wall, resembling a room or building.", " a blue and green penguin."], "sample_ids": ["5c04dac989a64d228283687588484b93", "50e0e11060df4c0db6a44daf3f32639d"], "properties": ["color, shape, material", "color, blue, green"], "captions_pred_pc": ["a 3d model of a ribbon with dots on it 3d model of a ribbon with dots on it, isolated on a white background royalty free illustration", "a black and white image of a ball with dots on it"], "captions_pred_image": ["a 3d model of a brick wall with a hole in it", "a 3d model of a penguin sitting down"], "question": "which entity is a blue and green penguin?", "label": 1}, {"captions": [" a small white building featuring a green drawer, white curved wall, kitchen sink, and windows, resembling a floor plan of a gym.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["b494049bb15440949e465d54a72b2f02", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["building, floorplan, gym", "roof, color, yellow"], "captions_pred_pc": ["above a black and white drawing of a floor plan", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white building with two floors", "a 3d model of a house with a roof"], "question": "which building has a yellow roof", "label": 1}, {"captions": ["a 3d object featuring a head with wires, a bowl of spaghetti, and a scribbled ball.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["9050ebb69503447680d4757dfc708754", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["a, head, bowl", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["in your own words a black and white illustration of a vase filled with beads", "a black and white image of a cone shaped object"], "captions_pred_image": ["a black and white drawing of a bunch of tangled wires", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a building featuring yellow columns, a yellow roof, and a wooden structure.", "a featuring a flying plane, a destroyed plane, a large airborne ship, a building with a broken roof, and a broken piece of metal."], "sample_ids": ["0ce6a4102f4f40e2a0084938b0a93941", "4839e3b998ff4f6a84de50488ffae3ba"], "properties": ["structure, columns, roof", "a, building, roof"], "captions_pred_pc": ["a black and white drawing of a window", "for a black and white drawing of a person holding a pencil"], "captions_pred_image": ["a 3d model of a building with multiple levels", "a 3d model of the space shuttle"], "question": "which building has a roof", "label": 0}, {"captions": ["a 3d white rectangular box with multiple compartments and a white lid.", " a wooden object, including a board, piece of wood, box, and shelf."], "sample_ids": ["966f6e9324a74d90831924895d3f2e8c", "c986212445a1466ca7be7b5ac6bea729"], "properties": ["size, color, shape", "wood, board, shelf"], "captions_pred_pc": ["a black and white illustration of a patterned rug", "a black and white drawing of snowflakes on a white background"], "captions_pred_image": ["a 3d rendering of a white cardboard box on a gray background", "a 3d rendering of a piece of marble"], "question": "which object is made of wood", "label": 1}, {"captions": [" a white, curved plastic object resembling an ear, mouth guard, or hat.", "white ceramic urn with a red lid."], "sample_ids": ["c3a82df41875402285608ef13a55df57", "ec15c810a38d4d45a36db910ecb2bcf8"], "properties": ["shape is curved, color is white, material is plastic", "color, white, lid, red"], "captions_pred_pc": ["a black and white drawing of a bird's wing", "a black and white photograph of a vase"], "captions_pred_image": ["a white plastic object on a gray background", "a white ceramic vase sitting on top of a gray surface"], "question": "which object is made of ceramic", "label": 1}, {"captions": ["a white ornate airplane design.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["23ec5bf5dd154e4a9df3194da7b8267a", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, shape, material", "a, material, clay"], "captions_pred_pc": ["of a black and white image of an ornate door knocker", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a decorative ornament on a white background", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": [" a green electrical utility box with a warning sign and a small blue box on a concrete base.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["8e39d4766ed4444ea527d6c5ea33a5ef", "b896a0898efe4059a776193c02132129"], "properties": ["color, base, warning", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white illustration of a box with dots on it", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of an electrical box royalty-free 3d model preview no.2", "a 3d model of an ancient statue"], "question": "which object is made of stone", "label": 1}, {"captions": [" a small white building with stairs and a white table.", " a small island featuring a large house, trees, and a village on a hill."], "sample_ids": ["e30374c614f54fdb90f35b96b071349d", "06a1c233fb444830b577aa06e2c01294"], "properties": ["building, stairs, table", "house, tree, hill"], "captions_pred_pc": ["above a black and white drawing of a cat sitting on top of a letter 'e'", "above a black and white image of a surfboard on a white background"], "captions_pred_image": ["a 3d model of a building with a staircase", "a black and white image of a house in the middle of a field"], "question": "which entity has a house?", "label": 1}, {"captions": [" a small island with trees, grass, water, and rocky terrain.", " a house with a wooden-framed roof structure."], "sample_ids": ["d5e708b7549e48e2b02fd6fe9f197ec2", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["island, terrain, water", "roof, material, wood"], "captions_pred_pc": ["a black and white photograph of a piece of paper with dots on it", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d image of a small island in the middle of a lake", "a 3d model of a building with a roof"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": [" a black and white cube-shaped building with a staircase.", " a wooden staircase with a red and white railing and a red arrow."], "sample_ids": ["587e65f2d904440488a98dfa9a4e9dbe", "bb8bb4c9972d4b718b8bbd3ed5fdd14d"], "properties": ["shape is cube, color is black, white", "arrow, red, white"], "captions_pred_pc": ["above a black and white photograph of a sculpture", "above a black and white image of a square with a white cross in the center"], "captions_pred_image": ["a black and white 3d model of a building", "a 3d model of a spiral staircase"], "question": "which staircase has a red arrow?", "label": 1}, {"captions": [" a snowy city with buildings and a plane flying overhead.", " a small house on a hill in a field."], "sample_ids": ["cc63ceb2b5e84872a1a1f6423de419e2", "bd873071252047d38160c4a5fdd2c1b7"], "properties": ["building, plane, city", "house, hill, field"], "captions_pred_pc": ["a black and white photo of an airplane on a white background", "a black and white photograph of a piece of paper"], "captions_pred_image": ["a 3d model of a city in black and white", "a black and white image of a small house"], "question": "which entity is in a field?", "label": 1}, {"captions": [" a small white building with stairs and shelves.", " of a house with a roof structure, including a greenhouse."], "sample_ids": ["9e1f64d4fd514059be934077717536dc", "d7483292784b4e2b81df1c50f2a8664a"], "properties": ["building, stairs, shelves", "roof, structure, greenhouse"], "captions_pred_pc": ["a black and white image of a person standing in front of a white background", "a 3d illustration of a window with dots on a white background 3d illustration of a window with dots on a white background royalty free illustration"], "captions_pred_image": ["a white 3d model of a building with stairs", "a 3d model of a building with a roof"], "question": "which entity has a roof structure", "label": 1}, {"captions": [" of a cherry blossom bonsai tree with pink flowers.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["037fff0f153c41ea8b9c9392c2e2439a", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["flower, color, pink", "roof, color, yellow"], "captions_pred_pc": ["for a black and white illustration of a person on a skateboard", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a bonsai tree on a pedestal", "a 3d model of a house with a roof"], "question": "which entity has a roof that is yellow", "label": 1}, {"captions": [" a wooden-framed house with roof trusses.", "a 3d white cube featuring various text and logos, including \"gypsy stribes,\" \"guinea pig studios,\" \"guillaume pi sodds,\" \"happy studios,\" and \"guillem pie sos.\""], "sample_ids": ["e60dd370c5ec468da4689a801f951157", "5d08c34bfb2c4c9b9538e24d68761331"], "properties": ["frame, roof, trusses", "- material is plastic- color is white- shape is cube"], "captions_pred_pc": ["a black and white drawing of a metal grate", "of a black and white photo of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a house under construction", "a 3d image of a cube with the word 'stories' written on it"], "question": "which entity is not a cube?", "label": 1}, {"captions": [" of a white wall-mounted light switch, electrical outlet, and various furniture pieces.", " a wooden shed with a gray roof."], "sample_ids": ["b195bf7ba6094e1b812e4312deeeb360", "8b32e1ded62144768cd9ca8945fa8524"], "properties": ["light switch, electrical outlet, furniture", "roof, color, gray"], "captions_pred_pc": ["a black and white image of a pair of sunglasses", "a black and white drawing of a window in the shape of dots on a white background a black and white drawing of a window in the shape of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a room with a white background", "a 3d model of a shed with a gray roof"], "question": "which entity has a roof that is the same color as the shed?", "label": 1}, {"captions": [" of a bearded man wearing a green shirt and a hat.", " a woman in a red dress holding a tennis racket, wearing a hat."], "sample_ids": ["1e4e5e8133ae48c797facaec724c13a5", "b89b19ddadd04d6799e90b611c889bae"], "properties": ["hat, shirt, bearded", "hat, dress, racket"], "captions_pred_pc": ["of a black and white bracelet on a white background", "a black and white illustration of a dendritic cell"], "captions_pred_image": ["a 3d model of a man with a beard", "a black and white photograph of a woman holding a tennis racket"], "question": "which entity is a woman?", "label": 1}, {"captions": ["a white 3d mannequin human head.", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["0598fef648c8422f84410847fda77e6a", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["color, white, mannequin", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["a black and white image of a heart shaped ring", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a 3d model of a human head on a white background royalty free 3d model preview no.2", "a 3d rendering of a plastic box with several compartments"], "question": "which object is made of plastic", "label": 1}, {"captions": [" of a white building with a small house and a desk with a laptop.", " a small island with trees, grass, water, and rocky terrain."], "sample_ids": ["9244a2d3a9e94c8398ef991f1661bb58", "d5e708b7549e48e2b02fd6fe9f197ec2"], "properties": ["a, desk, laptop", "island, terrain, water"], "captions_pred_pc": ["a black and white image of a piece of furniture", "a black and white photograph of a piece of paper with dots on it"], "captions_pred_image": ["a 3d model of an office desk on a white background", "a 3d image of a small island in the middle of a lake"], "question": "which entity has more water", "label": 1}, {"captions": [" a small white box with a shelf and a hole in it.", " a wooden staircase with a red and white railing and a red arrow."], "sample_ids": ["d023ae78bc5a436eaba13c5ecdd45c56", "bb8bb4c9972d4b718b8bbd3ed5fdd14d"], "properties": ["a, hole, shelf", "arrow, red, white"], "captions_pred_pc": ["a black and white drawing of a dotted square on a white background", "above a black and white image of a square with a white cross in the center"], "captions_pred_image": ["a 3d model of a white box on a gray background", "a 3d model of a spiral staircase"], "question": "which entity has a red arrow", "label": 1}, {"captions": [" a human foot with a red, white, and pink bone structure, including a skull with red and white details.", " a small wooden house with a green roof."], "sample_ids": ["39cdff793d3f4dcd898dd6b5222cb289", "912e0b8da1e8496489833d8a8ecffd31"], "properties": ["color, shape, size", "roof, color, green"], "captions_pred_pc": ["a black and white image of an ornate design on a white background", "a black and white map of hawaii on a white background"], "captions_pred_image": ["a 3d model of a human bone structure", "a 3d model of a house with a ladder"], "question": "which entity has a green roof", "label": 1}, {"captions": [" a white rock formation with pebbles and ice elements.", " a small table with a staircase and a square ceiling light."], "sample_ids": ["94d94f5a75de4bd0a43b08609630876e", "ef0cbf4628b74253b885480deefe6fbd"], "properties": ["color, shape, texture", "table, staircase, light"], "captions_pred_pc": ["a black and white drawing of a pair of shoes on a white background royalty free illustration", "for a black and white image of a small square on a white background"], "captions_pred_image": ["a 3d model of a small rocky island in the middle of a body of water", "a 3d model of a table with a staircase"], "question": "which entity is a table?", "label": 1}, {"captions": [" a modern house with a pool, balcony, and a small white box with a hole in it.", " a large steel building with a pool."], "sample_ids": ["795cebc8a9bd4780aa44c8dbccfd6d1a", "72eed67d8d884c819b28e2e95eb48f06"], "properties": ["house, pool, balcony", "building material, pool, steel"], "captions_pred_pc": ["above a black and white image of a room with a lot of dots", "a black and white illustration of a building with a tree growing out of it"], "captions_pred_image": ["a 3d model of a modern house", "a 3d model of a concrete structure"], "question": "which building has a pool", "label": 1}, {"captions": [" a leather recliner chair and ottoman set, featuring swivel functionality and available in modern orange, tan, and brown colors.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["44be138ae8e2409bbbca44a96fc67d45", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["color, tan, brown, orange", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["above a black and white illustration of an office chair", "a black and white image of a cone shaped object"], "captions_pred_image": ["a grey leather lounge chair with ottoman and footstool", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a white wooden climbing frame with swing set and ladders.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["f2935306c64a479685462220e33e6f3c", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["- material is wood - color is white - height is 1.8 m", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["an illustration of a barbed wire fence", "a black and white image of a cone shaped object"], "captions_pred_image": ["a 3d model of a playground ladder set royalty free 3d model preview no.2", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": [" a woman in a black dress with wings and shoes, featuring a spider and robot elements.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["745fed115c5343dab9d7f0c389c98902", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["costume, color, black", "island, mountain, grass"], "captions_pred_pc": ["a black and white illustration of a bumblebee on a white background", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d model of a person wearing a futuristic costume", "a 3d image of a small island in the middle of a lake"], "question": "which entity has grass", "label": 1}, {"captions": [" a building with blue metal framing and structure.", " a white and gold mirror on a wooden easel stand."], "sample_ids": ["ce40210c2a7e49dfaebbd934ccec4eca", "0d10d734448d4a5d8d07b938c12d9d80"], "properties": ["color, blue, structure", "color, white, gold"], "captions_pred_pc": ["a black and white image of dots on a white background", "for a black and white image of a shoe on a white background"], "captions_pred_image": ["a 3d model of a building under construction", "a 3d model of a standing mirror on a white background"], "question": "which object is white and gold?", "label": 1}, {"captions": [" a small house with stairs and a roof.", " a stone throne with stairs and a tree, accompanied by a concrete wall featuring a statue and a fireplace with a shelf above it."], "sample_ids": ["e9305c80010f4e3b9de9789f01a9bee5", "93fb4197f0014f7582029af24c7ed9de"], "properties": ["roof, stairs, house", "throne, stairs, tree"], "captions_pred_pc": ["above a black and white image of a square with dots all over it", "in 15 words or less a black and white image of a toilet paper roll on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a podium on a wooden floor", "a 3d model of a throne with a tree on it"], "question": "which entity has stairs", "label": 1}, {"captions": [" a stone wall featuring carvings and statues.", " a stone wall featuring carvings and statues."], "sample_ids": ["42f663140f834d1ab5f95cd8a5ad04b3", "42f663140f834d1ab5f95cd8a5ad04b3"], "properties": ["carving, statue, wall", "carving, statue, wall"], "captions_pred_pc": ["a black and white image of a snowflake on a white background", "a black and white image of a snowflake on a white background"], "captions_pred_image": ["a 3d image of a group of statues on a wall", "a 3d image of a group of statues on a wall"], "question": "which entity has a statue of a woman?", "label": 0}, {"captions": [" a small white building with a floor plan and ceiling light.", "a featuring a building, a plane, a printing machine, a large machine, a room with a computer, and a large gray box."], "sample_ids": ["1df55bb7035941cc9829aa904e2af065", "7e2b63ba4ce24cecacea67dd052016c1"], "properties": ["floor plan, ceiling light, color", "building, plane, room"], "captions_pred_pc": ["a line of dots on a white background a line of dots on a white background royalty free illustration", "a black and white image of a rectangular frame with dots on a white background a black and white image of a rectangular frame with dots on a white background royalty free illustration"], "captions_pred_image": ["a white 3d model of a house", "a 3d model of a box with a lot of items inside"], "question": "which entity has a room?", "label": 1}, {"captions": [" a crab with long legs, wires, and tentacles, resembling a hybrid of an octopus, squid, spider, and robot.", " a house with a wooden-framed roof structure."], "sample_ids": ["ec5914b53b6a4cde8de4820050bc46c5", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["resembles, octopus, squid, spider, robot", "roof, material, wood"], "captions_pred_pc": ["a black and white illustration of a jellyfish", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a robotic octopus on a white background royalty free 3d model preview no.2", "a 3d model of a building with a roof"], "question": "which entity has a roof made of wood", "label": 1}, {"captions": [" a metal-framed wall with red and blue bars in a steel structure.", "a black rock with writing on it."], "sample_ids": ["fefc99453e2d4406a9668d5697224c0f", "f9e35785655f464d8384a8d562de4ba2"], "properties": ["color, red, blue, structure", "color, black, writing"], "captions_pred_pc": ["a black and white image of a person holding a toothbrush", "a black and white image of a sphere with dots on it"], "captions_pred_image": ["a 3d rendering of a metal frame structure", "a black and white photograph of a rock with writing on it"], "question": "which object is black", "label": 1}, {"captions": [" a white hospital operating room with blue containers and medical equipment.", " a large steel building with a pool."], "sample_ids": ["a10f3196831647bc8842eacac640047d", "72eed67d8d884c819b28e2e95eb48f06"], "properties": ["color, white, containers", "building material, pool, steel"], "captions_pred_pc": ["a black and white illustration of the letter 'f' made up of tiny dots", "a black and white illustration of a building with a tree growing out of it"], "captions_pred_image": ["a 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room", "a 3d model of a concrete structure"], "question": "which building is made of steel", "label": 1}, {"captions": [" a castle on an island with a small floating house, trees, and clouds.", "a featuring a small room with a bunk bed, desk, chair, table, and a blue house."], "sample_ids": ["c4c09479570943e2845fbd4c6a450568", "dd3a9323ed514ccab330973ff9588015"], "properties": ["castle, island, house", "room, bed, desk"], "captions_pred_pc": ["above a black and white illustration of a group of dots in the shape of a circle", "a black and white drawing of a door"], "captions_pred_image": ["a 3d model of a small house on an island", "a 3d model of a small room with a bunk bed"], "question": "which entity has a desk?", "label": 1}, {"captions": ["a featuring a building, a square, a cloud with a square in the middle, a group of people in a room and a field, and clouds floating in the sky.", " a small house with trees and plants, featuring a white box with a blue lid and a green triangle, accompanied by a christmas tree."], "sample_ids": ["8557a15b9f244d2cbf16786dbc8b7b25", "4a889132cc444d10bfcbf6c760984416"], "properties": ["building, room, sky", "a, color, white"], "captions_pred_pc": ["above a black and white image of a person's hand holding a paintbrush", "a black and white illustration of a dandelion on a white background dandelion illustration on a white background stock illustration \u00a9 iStock/Getty Images"], "captions_pred_image": ["a 3d rendering of white clouds floating in the air", "a 3d model of a desk and chair"], "question": "which entity has a white box?", "label": 1}, {"captions": ["yellow and white 3d corn on the cob model resembling a toothbrush.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["1fd0e7ffe26349da89815a6d4d6a189a", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["color, shape, material", "roof, color, yellow"], "captions_pred_pc": ["a black and white illustration of a circle made up of tiny dots", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a corn on the cob 3d model of a", "a 3d model of a house with a roof"], "question": "which entity is a building?", "label": 1}, {"captions": [" a bicycle near a wall.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["c527acbe4e53402497207050a8115676", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["A, wall, bicycle", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["above a black and white image of an airplane flying in the sky", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a black and white image of a bicycle leaning against a wall", "a white 3d model of a city skyline"], "question": "which image shows a laptop?", "label": 1}, {"captions": [" of a blue tarp, flower, small island with a boat and phone, and a tent with a blue blanket.", " a mountainous landmass, resembling a small island or state like kentucky or wyoming."], "sample_ids": ["94704d86c22c4bdfb86ac24979926066", "8ca9b999b69c4965bd9eb4445d605bf2"], "properties": ["blue tarp, flower, small island", "mountainous, landmass, state"], "captions_pred_pc": ["above a 3d rendering of a fish in the air", "a black and white map of the state of new york"], "captions_pred_image": ["a black and white image of a piece of paper on the ground", "a 3d model of a piece of paper"], "question": "which entity is a mountainous landmass", "label": 1}, {"captions": [" a cartoon character wearing a hat, holding a bag, a baseball bat, and a blanket, resembling a toy animal.", "a pair of black armored warriors with swords, spears, and cloaks, including a female warrior and a dark knight."], "sample_ids": ["7e7272f3ddc24551905eccb63f3da42e", "46511f6f0fd04198b4005c159ac8ae40"], "properties": ["hat, bag, blanket", "cloak, sword, spear"], "captions_pred_pc": ["a black and white illustration of a cell with dots", "a black and white photo of a pair of snowflakes on a white background snowflakes on a white background royalty free stock photography"], "captions_pred_image": ["a 3d printed sculpture of a baseball player holding a bat", "a 3d model of a pair of armored knights standing next to each other"], "question": "which entity has a cloak?", "label": 1}, {"captions": [" a white building with a square ceiling panel and a white 3d printed plane on top.", "\"multiple white cubes arranged in a row on a gray background.\""], "sample_ids": ["eb3ea0e6963f4efda2a8cf0732befd56", "17c8222d4ce04e518117078e7de6aaed"], "properties": ["- material is 3d printed, ceiling panel is square, plane is white", "color, background, white"], "captions_pred_pc": ["above a black and white drawing of a cross", "a black and white image of a box with the words 'box 2' and 'thoughtful'"], "captions_pred_image": ["a 3d model of a building on a white surface", "an image of a white background with a few small cubes on it"], "question": "which object is white", "label": 0}, {"captions": ["a featuring a staircase, small red and white building, red box, table, and ceiling light.", "3d white playground set with slides and swings, featuring a plane, building, and dragon in the sky."], "sample_ids": ["11e2e8ca1f8849e394dfbf532c6d7ae0", "e694d53545d449319a64cceb0280c3c6"], "properties": ["a, building, staircase", "3d, slide, swing"], "captions_pred_pc": ["for a black and white photograph of a piece of metal", "for a 3d model of the letter 'j'"], "captions_pred_image": ["a 3d model of a staircase in the middle of a floor", "a 3d model of a playground slide"], "question": "which entity has a plane", "label": 1}, {"captions": [" of a blue and white shelf, cube, and tube.", " of a clear glass display case with a square light fixture."], "sample_ids": ["a4dd3e2c48224cff9ac2a2b0c813f06a", "980ded888795417f94b46750105e1597"], "properties": ["color, shape, material", "light, fixture, square"], "captions_pred_pc": ["for a black and white photo of a person sitting on a bench", "in 15 words or less a black and white illustration of a room with dots on the floor"], "captions_pred_image": ["a 3d rendering of a gray and white shelf", "a 3d model of a clear plastic box"], "question": "which object is made of glass", "label": 1}, {"captions": ["a white teddy bear, cloud, skull, octopus, and bird in various positions on a gray background.", " a small table with a staircase and a square ceiling light."], "sample_ids": ["dd5849aced0443b1b4b38d413f7e06c4", "ef0cbf4628b74253b885480deefe6fbd"], "properties": ["background, color, white", "table, staircase, light"], "captions_pred_pc": ["a black and white image of a cat's head", "for a black and white image of a small square on a white background"], "captions_pred_image": ["a 3d model of an animal skull in white on a gray background", "a 3d model of a table with a staircase"], "question": "which entity has a light?", "label": 1}, {"captions": ["a 3d white box with black trim, stripes, and handles.", " a small table with a staircase and a square ceiling light."], "sample_ids": ["55b26130f1514032be078e13fd982905", "ef0cbf4628b74253b885480deefe6fbd"], "properties": ["color, shape, material", "table, staircase, light"], "captions_pred_pc": ["a black and white drawing of a square made up of dots", "for a black and white image of a small square on a white background"], "captions_pred_image": ["a 3d model of a white box with a black handle", "a 3d model of a table with a staircase"], "question": "which object is made of wood", "label": 1}, {"captions": ["a collection featuring a black umbrella, a hammer, a door, and an axe, all with wooden handles.", " a silver, cylindrical object resembling a pair of chopsticks or a tall, thin pole."], "sample_ids": ["c6e87b303e9945a3b0fc282e8527b473", "6823de4492ef4b5b835c7468d53e0325"], "properties": ["collection, color, black, handle, wooden", "shape is cylindrical, color is silver, material is metal"], "captions_pred_pc": ["a black and white photo of a person in a suit and tie standing in front of a white wall", "a black and white image of a cone shaped object"], "captions_pred_image": ["a black and white image of a door handle and a knife", "a 3d model of a tall, slender object in the air"], "question": "which object is made of metal", "label": 1}, {"captions": ["a 3d wooden pole with a metal spear handle.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["1d5cf234576e41f0ba209c5e19d2db47", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["- material is wood, metal, metal", "a, material, clay"], "captions_pred_pc": ["of a black and white illustration of a vase on a pedestal", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a black and white image of an old-fashioned pitchfork", "a sculpture of a man's head on a pedestal"], "question": "which object is made of clay", "label": 1}, {"captions": [" a small white archway structure resembling a building.", " a stone throne with stairs and a tree, accompanied by a concrete wall featuring a statue and a fireplace with a shelf above it."], "sample_ids": ["5ad02458cf394134a902e25001d2ffef", "93fb4197f0014f7582029af24c7ed9de"], "properties": ["structure, building, archway", "throne, stairs, tree"], "captions_pred_pc": ["for a black and white illustration of a castle on a hill", "in 15 words or less a black and white image of a toilet paper roll on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a white object on a white surface", "a 3d model of a throne with a tree on it"], "question": "which entity is a throne?", "label": 1}, {"captions": [" a small white building featuring a room with a door and a white shelf.", " a small white house with a roof."], "sample_ids": ["d7b78fa9a6b64f6095b881bc619b04fe", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["room, door, shelf", "roof, color, white"], "captions_pred_pc": ["above a black and white illustration of a person standing in front of a door", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a 3d model of an empty room", "a 3d model of a building with a white roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" a molecule featuring green, red, and blue spheres.", "a featuring a city, blue and purple lamp, purple and yellow spaceship with three spheres, and a purple and blue castle."], "sample_ids": ["1c0e821eb7c4489dbff9e20d7e8575a3", "097c2f404b6c45c98da55ed95cc24f72"], "properties": ["color, sphere, molecule", "spheres, lamp, city"], "captions_pred_pc": ["a black and white photograph of a group of geometric shapes arranged in the shape of a diamond", "a black and white photograph of a turtle in the shape of a molecule"], "captions_pred_image": ["a 3d model of a molecule in the shape of a pyramid", "a 3d model of a futuristic spaceship"], "question": "which entity has more spheres", "label": 1}, {"captions": [" a small, modern house with a green roof, located on a hill, surrounded by trees, grass, and a pond.", " of two rocks with ice elements."], "sample_ids": ["a452d5381dad4dc09f5ebe10635ae5fe", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["house, roof, green", "image is a rock with ice elements"], "captions_pred_pc": ["above an illustration of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a building with a black roof", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a white plastic container with a lid, a small box, a cup, a bottle, and a jar.", "a featuring a small boat, a rock with a hole, and blue water."], "sample_ids": ["20a02705a66f460492e07345e84a62ed", "7ccdffc0d6404e8d9144260255ea0c5c"], "properties": ["a box, a cup, a bottle, a jar", "water, boat, rock"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "a black and white illustration of a surfboard"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic cap, and a plastic container", "a 3d image of an animal laying on the ground"], "question": "which entity has a boat?", "label": 1}, {"captions": [" a white box with a gold handle, featuring a spring inside and functioning as a kitchen utensil holder or electrical box, with gold details.", "s of a cat, fish, person with a hat, pig with a green hat and swimsuit, and a green and pink bird."], "sample_ids": ["ca275639b47a4093b4426d304695af7f", "402601779d1d4146b4cde106dfff1b27"], "properties": ["holder, spring, box", "s, cat, fish, person, pig, bird"], "captions_pred_pc": ["in 15 words a black and white illustration of a square with dots on a white background abstract illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white background illustration of a black and white illustration of a square with dots on a white", "above a black and white photo of a toy octopus on a white background"], "captions_pred_image": ["a 3d model of a plastic container with a spring inside", "a snowflake in the air on a cloudy day"], "question": "which entity is a picture of a cat", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " of a house with a roof structure, including a greenhouse."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "d7483292784b4e2b81df1c50f2a8664a"], "properties": ["roof truss, insulation, suspended ceiling", "roof, structure, greenhouse"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "a 3d illustration of a window with dots on a white background 3d illustration of a window with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d model of a building with a roof"], "question": "which roof is more complex", "label": 1}, {"captions": [" a small white building featuring a room with a door and a white shelf.", " of a small wooden house with a blue roof, featuring a police sign and resembling a lighthouse."], "sample_ids": ["d7b78fa9a6b64f6095b881bc619b04fe", "9b2c93d651c3409096118c5ce5b993f2"], "properties": ["room, door, shelf", "house, roof, blue"], "captions_pred_pc": ["above a black and white illustration of a person standing in front of a door", "a black and white illustration of a coffee mug on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of an empty room", "a 3d model of a small house and barn"], "question": "which house has a blue roof", "label": 1}, {"captions": ["a white ornate airplane design.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["23ec5bf5dd154e4a9df3194da7b8267a", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["color, shape, material", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["of a black and white image of an ornate door knocker", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a 3d model of a decorative ornament on a white background", "a white 3d model of a city skyline"], "question": "which entity is a photograph?", "label": 1}, {"captions": [" a large, rusty, square-shaped metal pillar resembling a rusted tower.", " a mountainous landmass, resembling a small island or state like kentucky or wyoming."], "sample_ids": ["b5afccae993346079483507296fbb029", "8ca9b999b69c4965bd9eb4445d605bf2"], "properties": ["shape is square, material is metal, color is rusty", "mountainous, landmass, state"], "captions_pred_pc": ["above a black and white image of a square frame with dots", "a black and white map of the state of new york"], "captions_pred_image": ["a 3d model of a concrete column", "a 3d model of a piece of paper"], "question": "which entity is a landmass", "label": 1}, {"captions": [" of a wooden staircase with marble floor and wooden railings in a house.", " a wooden object, including a board, piece of wood, box, and shelf."], "sample_ids": ["ee70964ce5e841bd87381cff40d59b88", "c986212445a1466ca7be7b5ac6bea729"], "properties": ["floor, staircase, railings", "wood, board, shelf"], "captions_pred_pc": ["a black and white drawing of a light switch", "a black and white drawing of snowflakes on a white background"], "captions_pred_image": ["a 3d model of a staircase on a marble surface", "a 3d rendering of a piece of marble"], "question": "which object is made of wood", "label": 1}, {"captions": ["a featuring a small island with trees, mountains, a house on a hill, a large building, and a boat floating in the water.", " a mountainous landmass, resembling a small island or state like kentucky or wyoming."], "sample_ids": ["37bdbc633c9545878a98ff47c3029e32", "8ca9b999b69c4965bd9eb4445d605bf2"], "properties": ["a, island, water", "mountainous, landmass, state"], "captions_pred_pc": ["a black and white photo of a boat in the water", "a black and white map of the state of new york"], "captions_pred_image": ["a 3d model of a building with trees surrounding it", "a 3d model of a piece of paper"], "question": "which entity is a mountainous landmass?", "label": 1}, {"captions": [" a yellow monster-sphere with teeth.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["fc5f906bab2c4c36a406ebdc15f58541", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, shape, texture", "a, material, clay"], "captions_pred_pc": ["a black and white illustration of a dandelion on a white background dandelion illustration on a white background illustration", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d model of a white ball with teeth", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": ["white korean word on a gray background.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["b0031bafaeff45e4bbb1c01721cf5b9e", "b896a0898efe4059a776193c02132129"], "properties": ["color, background, font", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white photo of a group of pipes in the shape of the letter 'o'", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a person holding a pair of chopsticks in front of a gray background", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": [" a red and brown rock formation with a broken tree branch.", "a clay sculpture of a man's head on a wooden block."], "sample_ids": ["c29d48d320c04ed1bf5aafe0a3df3d78", "52b7f2034d0044bb82cd714d6f7651d5"], "properties": ["color, shape, texture", "a, material, clay"], "captions_pred_pc": ["a black and white silhouette of an island on a white background", "a black and white image of a man's head made up of dots"], "captions_pred_image": ["a 3d image of a rock formation on a snowy surface", "a sculpture of a man's head on a pedestal"], "question": "which entity is made of clay", "label": 1}, {"captions": [" a small, rocky island with diverse terrain and scattered rocks.", "a 3d wooden toy castle with red and yellow towers, a red roof, and a man inside."], "sample_ids": ["09f2cf267e954c958828325067bcc36a", "311f6655ed854899b07ea10f3613ef7a"], "properties": ["island, terrain, rocks", "a, color, red"], "captions_pred_pc": ["above a black and white photo of a small island in the middle of a body of water", "a black and white drawing of a wallet on a white background"], "captions_pred_image": ["a black and white image of a piece of dirt on the ground", "a 3d model of a castle with two towers"], "question": "which entity has a red roof", "label": 1}, {"captions": [" a snowy small village with farm buildings and a fence.", " a snowy small village with farm buildings and a fence."], "sample_ids": ["6bb669534ccc434f9ab4d7b39bae3510", "6bb669534ccc434f9ab4d7b39bae3510"], "properties": ["building, fence, snowy", "building, fence, snowy"], "captions_pred_pc": ["a black and white drawing of a boat on the water", "a black and white drawing of a boat on the water"], "captions_pred_image": ["a 3d model of a small village in the snow royalty free 3d model preview no. 3", "a 3d model of a small village in the snow royalty free 3d model preview no. 3"], "question": "which entity has a fence?", "label": 0}, {"captions": ["a low poly of a deer, antelope, llama, capybara, and kangaroo.", "a white of a man with arms outstretched."], "sample_ids": ["8b4c2e3e76524d85a9395ea1169d953e", "84afafb3f8f04499bd77d4e7cbc40fb7"], "properties": ["low poly, llama, kangaroo", "image, color, white"], "captions_pred_pc": ["above a black and white image of an animal sculpture", "of a black and white silhouette of a person holding an umbrella"], "captions_pred_image": ["a 3d low poly animal standing on its hind legs", "a 3d model of a man with his arms outstretched"], "question": "which image is black and white?", "label": 1}, {"captions": [" a white rock with green grass and moss on it.", "green toy sand bucket and shovel with a squirt gun."], "sample_ids": ["0b53ca3fca7b4ef7bef27cb287daf32e", "ae173b4afc4d4b0499f1e4e55d647c06"], "properties": ["color, grass, moss", "color, green, squirt gun"], "captions_pred_pc": ["above a black and white drawing of a long, curved line on a white background", "a bucket with a sponge and a sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in the shape of a sponge sponge brush in"], "captions_pred_image": ["a 3d sculpture of a piece of white chocolate in the shape of a map", "a 3d model of a bucket and shovel royalty free 3d model preview no.2"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a metal-framed wall with red and blue bars in a steel structure.", "a white of a woman with her arms outstretched."], "sample_ids": ["fefc99453e2d4406a9668d5697224c0f", "2cbfaa4fb2d84a6f94e67b4fd6e2f26f"], "properties": ["color, red, blue, structure", "image, color, white"], "captions_pred_pc": ["a black and white image of a person holding a toothbrush", "a black and white silhouette of a person on a skateboard"], "captions_pred_image": ["a 3d rendering of a metal frame structure", "a 3d model of a woman with her arms outstretched"], "question": "which entity is a white image?", "label": 1}, {"captions": [" of a small blue and green gazebo with a table and chairs.", "a featuring a lamp, harp, white bowl, and white curved wall."], "sample_ids": ["0a3d553ed5d54c9794494af4f7a7e1c6", "55bcec23e1b34f0d9d748b4dcc3ea123"], "properties": ["color, gazebo, table", "lamp, harp, bowl"], "captions_pred_pc": ["a black and white illustration of a gazebo in the middle of a field of polka dots stock photography \u00a9 2018 iStock", "a black and white illustration of a curved line"], "captions_pred_image": ["a 3d model of a small gazebo with a fountain in the center", "a 3d model of a harp in a white room"], "question": "which entity has a lamp", "label": 1}, {"captions": [" a small white closet-like structure with a door, resembling a kitchen cabinet or miniature house.", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["37954d951eb24f23a4956df5a683bb92", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["door, white, structure", "door, lock, handle"], "captions_pred_pc": ["a black and white drawing of a house made up of tiny dots", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a 3d model of a modern, minimalist bathroom", "a black and white image of a door with a crack in it"], "question": "which door has a lock", "label": 1}, {"captions": ["a featuring a mossy rock, a piece of wood, a rocky island, a plane, a tree branch, a gray rock, a rocky mountain, and a moss-covered tree branch.", " a white plastic container with a lid, a small box, a cup, a bottle, and a jar."], "sample_ids": ["09e5288a9e98421985ee6e0042b3c325", "20a02705a66f460492e07345e84a62ed"], "properties": ["mossy, rock, rocky", "a box, a cup, a bottle, a jar"], "captions_pred_pc": ["a black and white illustration of a small island in the middle of a body of water", "a black and white pattern of dots on a white background"], "captions_pred_image": ["a 3d model of the comet 67p/churyumov-gerasimenko", "a 3d model of a plastic bottle, a plastic cap, and a plastic container"], "question": "which entity is not a jar?", "label": 1}, {"captions": [" a house featuring a wooden roof structure with trusses and beams.", " of a small white building with stairs and a lid."], "sample_ids": ["97e000ff41094665afd94ea565da8b13", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["roof, material, wood", "building, stairs, lid"], "captions_pred_pc": ["a black and white drawing of a floor plan", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d model of a white box on a gray background"], "question": "which building has a lid?", "label": 1}, {"captions": [" a cityscape featuring various buildings, trees, an airport runway, and a plane.", " a spider-like creature with long arms and legs."], "sample_ids": ["9b45036d3f0342a78db9a25938dc77fc", "199bcb789e0c439bb2eeb32f2425cc36"], "properties": ["building, tree, plane", "arachnid, leg, arm"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a snowflake in the shape of a snowflake royalty free illustration", "a black and white illustration of a spider on a white background"], "captions_pred_image": ["a black and white image of an industrial area with buildings and trucks in the foreground", "a black and white image of an alien creature"], "question": "which entity has more legs", "label": 1}, {"captions": ["a featuring a small room with a table, chair, laptop, and a white box containing a teddy bear and a piece of paper.", " a house with a roof and beams."], "sample_ids": ["24f7d0a06d494c26a1678d81b2b7b093", "c139e8fcbdbc4f63a9c8a06152c4dc2e"], "properties": ["a, laptop, teddy bear", "roof, beams, house"], "captions_pred_pc": ["a black and white drawing of an umbrella on a white background", "in 15 words or less a black and white illustration of a 3d object in the shape of a square"], "captions_pred_image": ["a 3d rendering of a white room with various objects", "a 3d model of a building with a roof"], "question": "which entity is a building?", "label": 1}, {"captions": [" a white plastic ring, resembling a car fender or mouth guard.", "a white ceramic vase with the words 'happy' and 'fish' written on it."], "sample_ids": ["f76460ad2a1a4fffb370d4556c405c6e", "243cd2c469984313b1522dca099eefd3"], "properties": ["size, material, color", "color, white, material, ceramic"], "captions_pred_pc": ["of a black plastic ring on a white background", "a black and white image of a vase in the shape of a fish"], "captions_pred_image": ["a 3d rendering of a white plastic object on a gray background", "a white vase on a grey background"], "question": "which object is made of ceramic", "label": 1}, {"captions": [" a wooden wall with a psychedelic circular pattern in red, green, and blue.", " a small house with a tree, pool, and pond in a green and blue landscape."], "sample_ids": ["5376daee484349378cb269a771ca5be0", "e22489ba182f45cb81f0a83f22abe9bd"], "properties": ["color, pattern, wall", "house, tree, pool"], "captions_pred_pc": ["of a cross made of black yarn on a white background", "in 15 words or less a black and white image of a square with dots around it"], "captions_pred_image": ["a 3d model of a black square with a circular pattern on it royalty free 3d model preview no.1", "a 3d model of a house and a tree in a box royalty free 3d model preview no.3"], "question": "which entity has a pool", "label": 1}, {"captions": [" of a gray stereo system with a blue drawer, resembling a printer and computer with a blue screen, featuring a cd player.", " of a white chest of drawers with legs."], "sample_ids": ["3d4d965e67744415b69ff6aaeb11f420", "f00dfa8b5e7e4fc6bbf97d718b66f390"], "properties": ["color, screen, drawer", "chest of drawers, legs, white"], "captions_pred_pc": ["above a black and white image of a brush", "of a black and white leopard print rug"], "captions_pred_image": ["a 3d model of a printer on a white background royalty free 3d model no.2", "a 3d rendering of a white dresser"], "question": "which chest of drawers has legs", "label": 1}, {"captions": [" a house with a yard, trees, bushes, and surrounding buildings.", " a large, black and white circular building, resembling a stadium or ring structure."], "sample_ids": ["7f8942ef51dd4246993a587a12df168c", "67f46bb0048244c687a58d1017a08f6b"], "properties": ["house, yard, surrounding buildings", "building, color, black and white"], "captions_pred_pc": ["a black and white image of a truck on a white background", "the letter c in black and white illustration of the letter c in black and white illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustration of the letter c isolated on a white background illustr"], "captions_pred_image": ["a 3d model of a house in the middle of a field", "a 3d model of a circular fence with black and white stripes"], "question": "which building is black and white", "label": 1}, {"captions": [" a small house with a yellow roof and chimney.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["0056e85a243b47a08ddbcd36816cb6ae", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["roof, yellow, chimney", "house, roof, wooden"], "captions_pred_pc": ["a black and white illustration of a house made up of dots on a white background a black and white illustration of a house made up of dots on a white background royalty free illustration", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a small house royalty-free 3d model preview no.2", "a black and white photograph of a birdhouse"], "question": "which house has a wooden roof", "label": 1}, {"captions": [" of a wooden building frame with truss and roof structure.", " of a house with a roof structure, including a greenhouse."], "sample_ids": ["1313f8185cf24f3bbd73ff4e4ddfab3e", "d7483292784b4e2b81df1c50f2a8664a"], "properties": ["frame, truss, roof", "roof, structure, greenhouse"], "captions_pred_pc": ["a black and white image of a ladder on a white background", "a 3d illustration of a window with dots on a white background 3d illustration of a window with dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d rendering of a bridge over a road", "a 3d model of a building with a roof"], "question": "which entity has a roof structure", "label": 1}, {"captions": [" of a house with multiple roofs and a suspended ceiling, featuring a white and purple tray.", " of a small wooden house with two roofs."], "sample_ids": ["7907916e8f524df5b16eb566497db83e", "30fc23ae4edb42609e30e029dede54bd"], "properties": ["color, roof, tray", "house, roof, wooden"], "captions_pred_pc": ["a black and white image of a metal object", "of a pair of stainless steel screws on a white background"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d model of a small barn"], "question": "which house has a wooden roof", "label": 1}, {"captions": [" a small house with a tree, pool, and pond in a green and blue landscape.", " a spider-like creature with long arms and legs."], "sample_ids": ["e22489ba182f45cb81f0a83f22abe9bd", "199bcb789e0c439bb2eeb32f2425cc36"], "properties": ["house, tree, pool", "arachnid, leg, arm"], "captions_pred_pc": ["in 15 words or less a black and white image of a square with dots around it", "a black and white illustration of a spider on a white background"], "captions_pred_image": ["a 3d model of a house and a tree in a box royalty free 3d model preview no.3", "a black and white image of an alien creature"], "question": "which entity has more legs", "label": 1}, {"captions": [" of a mannequin head wearing a leather plague mask with straps.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["7821b30c2f8545ccac3e8b8a305d5082", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["mannequin head, plague mask, straps", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["above a black and white image of a person's face with dots", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a plague doctor's mask on a mannequin head", "a white 3d model of a city skyline"], "question": "which image shows a laptop?", "label": 1}, {"captions": [" a wooden frame with gold trim, featuring a picture of a river surrounded by trees.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["25b86d7272e849f0967149243601bcbf", "bf18bfd89efd43389781050230467d58"], "properties": ["frame, picture, frame", "Lights, number, five"], "captions_pred_pc": ["a black and white patterned square on a white background", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a black and white photograph of a lake with trees in the background royalty free 3d model preview no.3", "a white chandelier with five white shades"], "question": "which object has more lights", "label": 1}, {"captions": [" a house with a roof, featuring roof truss, insulation, and a suspended ceiling.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["c1462fda08db4a769f68adae5c88cd43", "b896a0898efe4059a776193c02132129"], "properties": ["roof truss, insulation, suspended ceiling", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white drawing of an arrow pointing to the right", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a white bench with a grid pattern", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": [" of a rock formation with a white cliff and a rock.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["4a25f6dfbea943bca137dacd2f7b984f", "6b745457e06840119058883b35f78f58"], "properties": ["image is rock formation with a white cliff and a rock", "roof, color, blue"], "captions_pred_pc": ["above a black and white map of spain on a white background", "a black and white image of a building with dots"], "captions_pred_image": ["a black and white image of a rock formation on a gray background", "a 3d model of a house with a steeple on top"], "question": "which entity has a blue roof", "label": 1}, {"captions": ["a collection of various bottles, including accutane, vitamin c with a smiley face, activespray, jolt 3d, active kids, pills, and active junior, all containing different types of medicine.", "a 3d wooden toy castle with red and yellow towers, a red roof, and a man inside."], "sample_ids": ["bf76d14fcac24579920aa326fa607a17", "311f6655ed854899b07ea10f3613ef7a"], "properties": ["accutane, vitamin c, activespray, active kids, pills, active junior", "a, color, red"], "captions_pred_pc": ["a black and white illustration of a microscopic image of a virus on a white background royalty free stock illustration", "a black and white drawing of a wallet on a white background"], "captions_pred_image": ["a bottle of activated charcoal pills on a white background royalty free 3d model no.", "a 3d model of a castle with two towers"], "question": "which entity is not a toy?", "label": 0}, {"captions": [" a small white building featuring a room with a door and a white shelf.", "3d white playground set with slides and swings, featuring a plane, building, and dragon in the sky."], "sample_ids": ["d7b78fa9a6b64f6095b881bc619b04fe", "e694d53545d449319a64cceb0280c3c6"], "properties": ["room, door, shelf", "3d, slide, swing"], "captions_pred_pc": ["above a black and white illustration of a person standing in front of a door", "for a 3d model of the letter 'j'"], "captions_pred_image": ["a 3d model of an empty room", "a 3d model of a playground slide"], "question": "which entity has a plane", "label": 1}, {"captions": [" a house featuring a purple roof and a suspended ceiling with a light fixture.", " a small wooden house with a green roof, fire pit, wooden fence, bench, and a dog."], "sample_ids": ["579b43057ef74bd08d06bdd3e8d973a0", "f6c6e7a65a3e42dfa431b1d984c72f28"], "properties": ["roof, purple, suspended", "house, fence, dog"], "captions_pred_pc": ["a black and white image of a piece of paper with a pattern of dots on it", "above a black and white drawing of a bathroom"], "captions_pred_image": ["a 3d model of the roof of a building royalty free 3d model no.", "a 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a small cabin in the woods 3d model of a"], "question": "which house has a fence?", "label": 1}, {"captions": [" of a small house on a hill with a floating paper object and a curved metal piece.", " of two rocks with ice elements."], "sample_ids": ["000f67c78d9f409ca5dcc9d59f90287b", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["a, hill, paper, metal", "image is a rock with ice elements"], "captions_pred_pc": ["for a black and white image of an object on a white background", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a 3d model of a house on the side of a hill", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a spacious office featuring numerous desks, chairs, computers, and a ceiling with blue and white triangles and a ceiling light.", " of a wooden crate on a checkered floor with a ceiling light featuring a grid pattern."], "sample_ids": ["0d7e4d9471414a21b4a5b18a54f7ec22", "e832e6c497a041a0b8bde5418fa20a83"], "properties": ["ceiling, light, desks", "pattern, floor, light"], "captions_pred_pc": ["a black and white drawing of a square on a white background", "a black and white image of a patterned square"], "captions_pred_image": ["a 3d model of an office space with desks and chairs", "a 3d model of a small wooden box"], "question": "which entity has a ceiling light with a grid pattern", "label": 1}, {"captions": ["a 3d blue star featuring various text, including \"prchen,\" \"bible chen,\" \"rib chicken,\" and \"birch chen.\"", " a wooden door with a lock, handle, and a piece of paper on it."], "sample_ids": ["0b712fc68ad44ce8a33592d2b26aac18", "78762b19b7dc4823a0033ec63f092ca5"], "properties": ["color, shape, text", "door, lock, handle"], "captions_pred_pc": ["a black and white image of a pair of sunglasses", "in 15 words or fewer a silhouette of an airplane flying in the sky royalty free illustration"], "captions_pred_image": ["a white paper airplane flying over a gray background", "a black and white image of a door with a crack in it"], "question": "which door has a lock and handle", "label": 1}, {"captions": [" a pink-framed building structure with beams and trusses.", " of a house with a roof and roof framing, featuring a building with a yellow roof."], "sample_ids": ["18e392c5360146eda498c5edab25b15c", "44cc75e4baf4434daf96ec819cd86410"], "properties": ["frame, beams, trusses", "roof, color, yellow"], "captions_pred_pc": ["a black and white drawing of a metal grate", "a black and white illustration of a 3d object in the shape of a diamond on a white background abstract illustration of a 3d object in the shape of a diamond on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a building under construction", "a 3d model of a house with a roof"], "question": "which building has a roof that is the color of yellow", "label": 1}, {"captions": [" of a mushroom cloud with white tree and coral plant elements, featuring blue and green leaves.", " a robot with a blue, purple, and white body."], "sample_ids": ["bca5233d878e4cf09b5bc2bb6f3915b0", "6f98acb9e03c4cbd9c83f2c8f9cd3ddc"], "properties": ["color, shape, texture", "body, color, white"], "captions_pred_pc": ["a black and white image of a square with dots on it", "above a black and white image of a robot"], "captions_pred_image": ["a 3d model of a tree made out of white flowers", "a 3d model of a robot standing in the middle of a white background"], "question": "which entity has a white body?", "label": 1}, {"captions": [" a stone arrowhead with blue crystals and ice-like features.", " a small house with a tree and a rock."], "sample_ids": ["5f8c7eda0f464019a4acea243114555d", "9dc392a7f6e444e5bfb720684d6f864a"], "properties": ["- material is stone - color is blue - shape is arrowhead", "house, tree, rock"], "captions_pred_pc": ["above a black and white drawing of an arrow", "in 15 words or less the image depicts a white square on a white background with black dots all over the square stock illustration"], "captions_pred_image": ["a 3d model of a large piece of ice on a white background royalty free 3d model preview no 3", "a 3d model of a small house with a tree in front of it"], "question": "which object is made of rock", "label": 1}, {"captions": [" a pink room featuring a bed, desk, window, and lamp.", " of a white plastic tube with a hole and a chip on it."], "sample_ids": ["395af20de6fe49dbbbb030f0e452cbe1", "9968e06a62e8487ea33460e640abc573"], "properties": ["bed, desk, window", "color is white, material is plastic, shape is tube"], "captions_pred_pc": ["of a black and white drawing of a curved line", "a black and white image of a broom on a stand"], "captions_pred_image": ["a 3d model of a bedroom royalty free 3d model preview no.2", "a white object on a gray background"], "question": "which object is not a room?", "label": 1}, {"captions": [" a house featuring a wooden roof truss structure and ceiling with wood beams.", " a house featuring a roof with wooden trusses and a ladder."], "sample_ids": ["990f06da2ba4488da8371f68da6b4523", "3cd410c4359a4cef98702963a2b9802b"], "properties": ["roof truss, beams, structure", "roof, trusses, ladder"], "captions_pred_pc": ["a black and white illustration of a staircase in the shape of the letter 'l'", "a black and white drawing of a tv screen on a white background"], "captions_pred_image": ["a 3d model of a house with a wooden roof", "a 3d model of the roof of a building"], "question": "which house has a roof with wooden trusses and a ladder?", "label": 1}, {"captions": [" a bridge structure with a pier, stairway, railings, and red and green elements.", " a small house with trees and plants, featuring a white box with a blue lid and a green triangle, accompanied by a christmas tree."], "sample_ids": ["23701a9674204ea881fb8af9914b1924", "4a889132cc444d10bfcbf6c760984416"], "properties": ["color, pier, railings", "a, color, white"], "captions_pred_pc": ["a black and white illustration of a computer keyboard", "a black and white illustration of a dandelion on a white background dandelion illustration on a white background stock illustration \u00a9 iStock/Getty Images"], "captions_pred_image": ["a 3d model of a staircase in the middle of a field", "a 3d model of a desk and chair"], "question": "which entity has a white box?", "label": 1}, {"captions": [" a house featuring a wooden roof structure with trusses and beams.", "a featuring a flying plane, a destroyed plane, a large airborne ship, a building with a broken roof, and a broken piece of metal."], "sample_ids": ["97e000ff41094665afd94ea565da8b13", "4839e3b998ff4f6a84de50488ffae3ba"], "properties": ["roof, material, wood", "a, building, roof"], "captions_pred_pc": ["a black and white drawing of a floor plan", "for a black and white drawing of a person holding a pencil"], "captions_pred_image": ["a 3d model of the roof of a building", "a 3d model of the space shuttle"], "question": "which building has a roof", "label": 1}, {"captions": ["a featuring a skeleton, torn paper, long stick, rock, and broken wood.", " a floating small island with trees, grass, and a mountain."], "sample_ids": ["46903bf029934b1989bc062dcb0a5531", "95e0d4e2464b433dbb6c4d1d30e8150f"], "properties": ["skeleton, torn, paper, long stick, rock, broken wood", "island, mountain, grass"], "captions_pred_pc": ["a close up of a black object on a white background", "a black and white map of the island of malta"], "captions_pred_image": ["a 3d sculpture of a person's hand in the air royalty-free 3d model preview", "a 3d image of a small island in the middle of a lake"], "question": "which entity has grass?", "label": 1}, {"captions": [" of a small wooden house with two roofs.", " a house with a roof, roof truss, and suspended ceiling structure."], "sample_ids": ["30fc23ae4edb42609e30e029dede54bd", "5abf69f79b92484fb54d41ff0c0a2c11"], "properties": ["house, roof, wooden", "roof, truss, suspended ceiling"], "captions_pred_pc": ["of a pair of stainless steel screws on a white background", "a suitcase made of dots on a white background a suitcase made of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small barn", "a 3d model of a house with roof trusses"], "question": "which house has a roof truss", "label": 1}, {"captions": [" a blue circuit board with electronic components.", "a 3d wooden toy castle with red and yellow towers, a red roof, and a man inside."], "sample_ids": ["4816a2780af54492b6692fd78347f1ac", "311f6655ed854899b07ea10f3613ef7a"], "properties": ["color, blue, components", "a, color, red"], "captions_pred_pc": ["a black and white drawing of a person sitting on a bench", "a black and white drawing of a wallet on a white background"], "captions_pred_image": ["a 3d printed circuit board with various electronic components", "a 3d model of a castle with two towers"], "question": "which object is made of wood", "label": 1}, {"captions": [" of a white plastic tube or metal bar, resembling a knife.", " a small white house with a roof."], "sample_ids": ["8fd3836862a44a8d8b4d224bfc30c2c3", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["a knife, blade, handle", "roof, color, white"], "captions_pred_pc": ["a black and white image of a shelf with a white background", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a piece of white plastic on a gray background", "a 3d model of a building with a white roof"], "question": "which object has a white roof", "label": 1}, {"captions": [" a small house with a pond and situated on a rock.", " a house with a wooden-framed roof structure."], "sample_ids": ["92859eb82a344134806b37cc209927c6", "ef2edaf670a64ec29114d7eeeaec7776"], "properties": ["house, rock, pond", "roof, material, wood"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a toaster", "a black and white drawing of a staircase"], "captions_pred_image": ["a 3d model of a house in the middle of a field", "a 3d model of a building with a roof"], "question": "which house has a wooden-framed roof structure", "label": 1}, {"captions": ["s of a cat, fish, person with a hat, pig with a green hat and swimsuit, and a green and pink bird.", " of two rocks with ice elements."], "sample_ids": ["402601779d1d4146b4cde106dfff1b27", "a6e002dc07f94f65adc7a650de87d9f3"], "properties": ["s, cat, fish, person, pig, bird", "image is a rock with ice elements"], "captions_pred_pc": ["above a black and white photo of a toy octopus on a white background", "a black and white image of two rocks on a white background"], "captions_pred_image": ["a snowflake in the air on a cloudy day", "a 3d image of two rocks on a gray surface"], "question": "which image is a rock with ice elements?", "label": 1}, {"captions": [" a colorful building with red and blue blocks, a yellow roof, and hanging from the ceiling.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["2a31c42de6f74ddba6b19b3467066e11", "c3a82df41875402285608ef13a55df57"], "properties": ["color, roof, block", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white drawing of a room with a lot of dots", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a 3d model of a table with multiple tables stacked on top of each other", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a room featuring a table and chairs, with blue and green walls.", " a modern house with a pool, balcony, and a small white box with a hole in it."], "sample_ids": ["a49899d9a6194583b745e02f3654841e", "795cebc8a9bd4780aa44c8dbccfd6d1a"], "properties": ["color, table, chairs", "house, pool, balcony"], "captions_pred_pc": ["a close-up of a white object on a white background", "above a black and white image of a room with a lot of dots"], "captions_pred_image": ["a 3d rendering of a room with a white wall", "a 3d model of a modern house"], "question": "which entity has a pool", "label": 1}, {"captions": [" a castle on an island with a small floating house, trees, and clouds.", " a small, snow-covered house."], "sample_ids": ["c4c09479570943e2845fbd4c6a450568", "0d00d10b90134dbe9ce7b2b3d6669237"], "properties": ["castle, island, house", "house, snow, cover"], "captions_pred_pc": ["above a black and white illustration of a group of dots in the shape of a circle", "in 15 words or less a black and white image of a piece of paper on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small house on an island", "a piece of broken glass on a white background"], "question": "which house is covered in snow", "label": 1}, {"captions": [" a house featuring a purple roof and a suspended ceiling with a light fixture.", " a house featuring a detailed roof structure and a suspended ceiling with a map on it."], "sample_ids": ["579b43057ef74bd08d06bdd3e8d973a0", "ee7e6031912b46bc8ca7205a959c5c16"], "properties": ["roof, purple, suspended", "roof, structure, suspended"], "captions_pred_pc": ["a black and white image of a piece of paper with a pattern of dots on it", "a black and white image of a piece of lace"], "captions_pred_image": ["a 3d model of the roof of a building royalty free 3d model no.", "a 3d model of a house with a metal roof"], "question": "which roof is more detailed", "label": 1}, {"captions": [" of a cup, bottle, and two metal buckets on a chessboard with a square ceiling light fixture.", " a white rocking chair with a curved backrest."], "sample_ids": ["05b5a5da1a0a4c1fa60a9e5edd5c3424", "ee0deb90abf943b6894cd5ded1331213"], "properties": ["cup, bottle, chessboard", "backrest, curved, yes"], "captions_pred_pc": ["a black and white 3d shape made up of small dots on a white background", "a black and white illustration of a spiral pattern on a white background a black and white illustration of a spiral pattern on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a chess set on a checkered board royalty-free 3d model", "a 3d model of a white chair royalty free 3d model no. 3"], "question": "which object has a curved backrest", "label": 1}, {"captions": [" a small house with a tree and a tree stump, featuring a roof and a square ceiling with a hole.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["b16fb21cda9a4a21a024df749c2304f4", "b896a0898efe4059a776193c02132129"], "properties": ["roof, ceiling, hole", "- material is stone, metal, concrete"], "captions_pred_pc": ["a black and white image of a square with dots on it", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d model of a small house and a tree in the foreground", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": [" a house featuring a wooden roof truss structure and ceiling with wood beams.", " of a small wooden house with a roof."], "sample_ids": ["990f06da2ba4488da8371f68da6b4523", "f5904a9d87ff4fa688146c18c1f27fec"], "properties": ["roof truss, beams, structure", "roof, house, wooden"], "captions_pred_pc": ["a black and white illustration of a staircase in the shape of the letter 'l'", "a black and white drawing of a house with dots"], "captions_pred_image": ["a 3d model of a house with a wooden roof", "a 3d model of a small house"], "question": "which house has a wooden roof", "label": 1}, {"captions": ["a featuring a fireplace, wooden bench, sled, log, castle, and cat.", " of a white plastic tube with a hole and a chip on it."], "sample_ids": ["fe4cd6c3972940a5b7854ca1ebc8a5a3", "9968e06a62e8487ea33460e640abc573"], "properties": ["fireplace, bench, log", "color is white, material is plastic, shape is tube"], "captions_pred_pc": ["a black and white illustration of a snowflake", "a black and white image of a broom on a stand"], "captions_pred_image": ["a 3d model of a fireplace with snow on the ground", "a white object on a gray background"], "question": "which object is made of plastic", "label": 1}, {"captions": ["a 3d object collection featuring a long stick, a grassy hill, a fish, a green and yellow knife, a small green and purple fish, a bird, and a crocodile.", " a small house with trees and plants, featuring a white box with a blue lid and a green triangle, accompanied by a christmas tree."], "sample_ids": ["856e5f6a854d4c608901f1e2b580344c", "4a889132cc444d10bfcbf6c760984416"], "properties": ["a, b, c", "a, color, white"], "captions_pred_pc": ["above a black and white drawing of a submarine", "a black and white illustration of a dandelion on a white background dandelion illustration on a white background stock illustration \u00a9 iStock/Getty Images"], "captions_pred_image": ["a black and white photograph of a fish on a gray background", "a 3d model of a desk and chair"], "question": "which object is white", "label": 1}, {"captions": [" a white rock with green grass and moss on it.", " of a wooden shelf featuring a laptop, sand dune, and ceiling with holes."], "sample_ids": ["0b53ca3fca7b4ef7bef27cb287daf32e", "6192773417e04fa49d2bf8b9eed20640"], "properties": ["color, grass, moss", "image is a shelf, laptop, sand dune"], "captions_pred_pc": ["above a black and white drawing of a long, curved line on a white background", "above a black and white image of a person sitting on a bench"], "captions_pred_image": ["a 3d sculpture of a piece of white chocolate in the shape of a map", "a white 3d model of a city skyline"], "question": "which image shows a laptop?", "label": 1}, {"captions": [" a room featuring a wall with a painting, a hole, and a door.", "a featuring a room with a staircase, a damaged bus and rv, two houses, a large white box, and destroyed buildings with debris."], "sample_ids": ["1d1328346a464d2482463d6d5288e934", "a47dcf3d3cf34c58af17b6715d6f1232"], "properties": ["painting, door, wall", "room, staircase, bus"], "captions_pred_pc": ["in one hundred words or less an illustration of an igloo on a white background stock illustration", "a black and white drawing of a room with dots"], "captions_pred_image": ["a black and white photograph of a torn piece of paper in the shape of a bird", "a 3d image of a building with a lot of debris"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" of a broken stone wall featuring an angel sculpture.", " of a small wooden house with a blue roof and clock tower."], "sample_ids": ["aae2c42740a04fd68068f5707111d26f", "6b745457e06840119058883b35f78f58"], "properties": ["image is a sculpture of an angel on a wall", "roof, color, blue"], "captions_pred_pc": ["a black and white image of a toilet paper roll", "a black and white image of a building with dots"], "captions_pred_image": ["a 3d model of a marble sculpture of an angel", "a 3d model of a house with a steeple on top"], "question": "which entity has a blue roof", "label": 1}, {"captions": [" of a japanese geisha in a metal frame with a triangular-shaped metal door.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["0a75511848d74b7f97422ad85a27ec2e", "b896a0898efe4059a776193c02132129"], "properties": ["frame, door, shape", "- material is stone, metal, concrete"], "captions_pred_pc": ["a line of black dots on a white background", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a framed picture of a geisha with a fan in her hand", "a 3d model of an ancient statue"], "question": "which object is made of metal", "label": 1}, {"captions": [" a house with a roof and white brick wall.", " a large white and metal building with a metal roof structure."], "sample_ids": ["00915b83a52b45d498962d0cd42af491", "0ee2683270b1486991f9b9ef12990a78"], "properties": ["roof, wall, color", "roof, metal, white"], "captions_pred_pc": ["a black and white image of a rectangle with dots all over it", "of a lamp with a black shade on a white background"], "captions_pred_image": ["a 3d model of a small white house with a roof", "a 3d model of a large white box"], "question": "which building has a white roof?", "label": 1}, {"captions": [" the earth featuring various elements such as temperature chart, blue and green stripes, blue arrow, exosphere label, england label, blue and purple stripes, and a blue flag.", " a small house with a blue roof."], "sample_ids": ["4945571db2d8467cb2aed8dd0d891c2e", "fa21afd3a99d448cb23fa527a784769c"], "properties": ["color, temperature, england", "roof, color, blue"], "captions_pred_pc": ["of a black and white photo of an airplane on a white background", "a house made of dots on a white background a house made of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of the earth with a rainbow in the sky", "a 3d model of a house with a porch and balcony royalty-free 3d model preview no.2"], "question": "which entity has a blue roof", "label": 1}, {"captions": ["a featuring a small desk and chair, table, staircase, bathroom with sink, and square ceiling light in a small house setting.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["1be04fc7b47c47e9aaa9d2720af16b87", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["desk, chair, table", "house, roof, wooden"], "captions_pred_pc": ["for a black and white image of a letter 'f'", "for a black and white image of an object on a white background"], "captions_pred_image": ["a 3d model of a staircase in a room", "a black and white photograph of a birdhouse"], "question": "which house has a wooden roof", "label": 1}, {"captions": [" of a hammer with a metal handle, compatible with 3ds max, maya, blender, and other ing and animation software.", " of a stone fountain with a lion statue, surrounded by an archway inspired by the arch of triumph."], "sample_ids": ["5d0b9c038a1847f29384ec09cffc43d9", "a72700696c3b44ef8101d1e71e914bc9"], "properties": ["- material is metal - weight is 0 - height is 0", "lion, statue, fountain"], "captions_pred_pc": ["a black and white illustration of a person flying a kite", "a black and white image of a metal object"], "captions_pred_image": ["a 3d rendering of a metal clamp with a spike on top of it", "a 3d image of a lion statue on a wall"], "question": "which object is made of stone", "label": 1}, {"captions": ["a small white bowl with a light blue glaze and blue rim.", " a white, curved plastic object resembling an ear, mouth guard, or hat."], "sample_ids": ["6f7201fbb58649379398a8d1d5c0cc7a", "c3a82df41875402285608ef13a55df57"], "properties": ["color, blue, rim, blue", "shape is curved, color is white, material is plastic"], "captions_pred_pc": ["a black and white drawing of a dotted circle on a white background", "a black and white drawing of a bird's wing"], "captions_pred_image": ["a white bowl on a gray background", "a white plastic object on a gray background"], "question": "which object is white", "label": 1}, {"captions": [" a white hospital operating room with blue containers and medical equipment.", " a small table with a staircase and a square ceiling light."], "sample_ids": ["a10f3196831647bc8842eacac640047d", "ef0cbf4628b74253b885480deefe6fbd"], "properties": ["color, white, containers", "table, staircase, light"], "captions_pred_pc": ["a black and white illustration of the letter 'f' made up of tiny dots", "for a black and white image of a small square on a white background"], "captions_pred_image": ["a 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room 3d model of a hospital room", "a 3d model of a table with a staircase"], "question": "which entity has a staircase?", "label": 1}, {"captions": [" a house with roof trusses and wooden beams on a suspended ceiling.", " a cartoon panda bear wearing a diaper."], "sample_ids": ["3c2e3a3670b042069bd8290e2c357702", "e2c307d9fa2b4d40b4602537d7f71e24"], "properties": ["roof trusses, beams, suspended ceiling", "cartoon, bear, diaper"], "captions_pred_pc": ["above a black and white drawing of a building", "a 3d model of a teddy bear on a white background 3d model of a teddy bear on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a house with a roof in progress royalty free 3d model preview no. 1", "a 3d model of a cute panda bear"], "question": "which entity is a cartoon?", "label": 1}, {"captions": [" of a set of three white shelves or ceiling panels with lines.", " of a small white building with stairs and a lid."], "sample_ids": ["ff6f8c2347ab4c17a2b3e319b205f191", "6ba301c579fa465fa454fe7487eb70cb"], "properties": ["set, white, lines", "building, stairs, lid"], "captions_pred_pc": ["a set of black and white dots on a white background royalty free illustration", "in 15 words or less a black and white drawing of a leopard on a white background royalty free illustration"], "captions_pred_image": ["a 3d illustration of a set of three white trays on a gray surface", "a 3d model of a white box on a gray background"], "question": "which object has a lid?", "label": 1}, {"captions": [" a house with a green roof and lawn.", " a small white barn with a metal roof."], "sample_ids": ["84adcf124ee742a49f7b1fe2104e072d", "4ca3342a96824684845f7d0e062ab176"], "properties": ["roof, green, lawn", "roof, metal, white"], "captions_pred_pc": ["a black and white image of a knife on a white background royalty free illustration", "in 15 words or less a black and white illustration of a house made of dots"], "captions_pred_image": ["a 3d model of a small apartment building royalty free 3d model preview no.2", "a 3d model of a barn"], "question": "which roof is made of metal", "label": 1}, {"captions": [" a small white closet-like structure with a door, resembling a kitchen cabinet or miniature house.", " a black and white striped box."], "sample_ids": ["37954d951eb24f23a4956df5a683bb92", "00fa8accaaad44c780efe0c04ed4a12b"], "properties": ["door, white, structure", "color, black, white"], "captions_pred_pc": ["a black and white drawing of a house made up of tiny dots", "in 15 words or less a black and white pattern on a white background"], "captions_pred_image": ["a 3d model of a modern, minimalist bathroom", "a 3d image of a black and white striped surface"], "question": "which entity is a box?", "label": 1}, {"captions": [" a cityscape featuring various buildings, trees, an airport runway, and a plane.", " a castle on an island with a small floating house, trees, and clouds."], "sample_ids": ["9b45036d3f0342a78db9a25938dc77fc", "c4c09479570943e2845fbd4c6a450568"], "properties": ["building, tree, plane", "castle, island, house"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a snowflake in the shape of a snowflake royalty free illustration", "above a black and white illustration of a group of dots in the shape of a circle"], "captions_pred_image": ["a black and white image of an industrial area with buildings and trucks in the foreground", "a 3d model of a small house on an island"], "question": "which entity has a castle?", "label": 1}, {"captions": ["a white hat, plastic cup with a lid, and a bowl.", " a small island featuring a large building, trees, and a house on a hill, surrounded by a forest."], "sample_ids": ["4a9d79b48eda4ad797a652ee01b1b026", "d557c62e9be741a6b0f6b204d11a9c6f"], "properties": ["hat, cup, bowl", "house, hill, forest"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a funnel", "above a black and white illustration of a small island in the middle of a body of water"], "captions_pred_image": ["a white plastic hat and bowl on a gray background", "a black and white image of a small island in the middle of a body of water"], "question": "which entity has a house on a hill?", "label": 1}, {"captions": [" a hand featuring red, green, and blue stripes.", "a green plastic box with two compartments, a green lid, and the words \"moddo fas so\" and \"modido\" printed on it."], "sample_ids": ["6ea21f8f91b04144b43aa4b606339de3", "9bcb7cc44b444326bc426cd9e2aacf60"], "properties": ["color, red, green, blue", "- material is plastic- color is green- shape is box"], "captions_pred_pc": ["above a black and white image of a glove", "a black and white illustration of a toilet brush and toilet brush holder"], "captions_pred_image": ["a 3d model of a white and gray glove", "a 3d rendering of a plastic box with several compartments"], "question": "which entity is made of plastic", "label": 1}, {"captions": [" a small white house with stairs and a spiral staircase, featuring a white table and ceiling light.", " a small house with a tree, pool, and pond in a green and blue landscape."], "sample_ids": ["e9e1cc7fae22458197a61f43a9c355f4", "e22489ba182f45cb81f0a83f22abe9bd"], "properties": ["house, staircase, table", "house, tree, pool"], "captions_pred_pc": ["above a black and white photograph of a dog in a frame", "in 15 words or less a black and white image of a square with dots around it"], "captions_pred_image": ["a 3d model of a small house with a spiral staircase", "a 3d model of a house and a tree in a box royalty free 3d model preview no.3"], "question": "which house has a tree", "label": 1}, {"captions": ["a white hat, plastic cup with a lid, and a bowl.", "a white hat, plastic cup with a lid, and a bowl."], "sample_ids": ["4a9d79b48eda4ad797a652ee01b1b026", "4a9d79b48eda4ad797a652ee01b1b026"], "properties": ["hat, cup, bowl", "hat, cup, bowl"], "captions_pred_pc": ["in 15 words or less a black and white illustration of a funnel", "in 15 words or less a black and white illustration of a funnel"], "captions_pred_image": ["a white plastic hat and bowl on a gray background", "a white plastic hat and bowl on a gray background"], "question": "which entity has a white hat", "label": 0}, {"captions": [" a small village featuring houses, trees, and a winding road.", " a small white house with a roof."], "sample_ids": ["7acf46c0265d4e39b97ac084852abde8", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["houses, trees, road", "roof, color, white"], "captions_pred_pc": ["in 15 words or less a black and white photo of a mountain landscape", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a black and white photograph of a small town", "a 3d model of a building with a white roof"], "question": "which entity has a roof", "label": 1}, {"captions": ["a gray background featuring a white line in the middle.", " a city with buildings, houses, trees, and grass."], "sample_ids": ["47f89f92bef14b7193d0ffa3934f6977", "bc649e19956041cf89c1572f1a33cff1"], "properties": ["color, line, gray", "buildings, houses, grass"], "captions_pred_pc": ["above a black and white image of a piece of furniture", "in 15 words or less a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["an airplane flying in the sky with the sun shining behind it", "an image of a pile of trash on the ground"], "question": "which entity has more grass", "label": 1}, {"captions": [" of a pillow featuring a hot dog shape and a bag of chips with a dragon design.", " a small wooden building with a roof, resembling a birdhouse or cabinet."], "sample_ids": ["4ae6ac813d584d12a5d5d608a595bfe5", "a60e4d5f34aa4a6280343a8f15bb1c13"], "properties": ["shape is hot dog, design is dragon, color is black", "house, roof, wooden"], "captions_pred_pc": ["a black and white illustration of an eye with dots", "for a black and white image of an object on a white background"], "captions_pred_image": ["a black and white photo of a pillow with a soda can on it", "a black and white photograph of a birdhouse"], "question": "which object is made of wood", "label": 1}, {"captions": [" a small white box with a shelf and a hole in it.", " of a small white dollhouse featuring furnished rooms, including a bedroom with a bed and desk, and a bathroom."], "sample_ids": ["d023ae78bc5a436eaba13c5ecdd45c56", "f178fb523ad7421aaa90a92ee736ee00"], "properties": ["a, hole, shelf", "bedroom, bathroom, bed"], "captions_pred_pc": ["a black and white drawing of a dotted square on a white background", "a black and white drawing of a room with dots"], "captions_pred_image": ["a 3d model of a white box on a gray background", "a 3d model of a small room with a bed, desk, and chair"], "question": "which entity has a bathroom?", "label": 1}, {"captions": [" a stone, wood, rock, sliced bread, and a skull with a blue hat.", " a tan, cylindrical hat with a hole in it."], "sample_ids": ["0169af65ffc64bbf8e2fe6c6de08d485", "00c350e9f5f24fcd8bcc378da2963d4c"], "properties": ["hat, skull, bread", "hat, color, tan"], "captions_pred_pc": ["a black and white illustration of a skull in the shape of dots", "a black and white drawing of a mushroom on a white background royalty free illustration"], "captions_pred_image": ["a black and white image of a stone sculpture", "a 3d model of an object with a hole in it"], "question": "which hat is tan", "label": 1}, {"captions": [" a small black and white cube.", " a white motorcycle with wings."], "sample_ids": ["fa7e884d363847619f89f8ee21fa8742", "7e684a7c012c4fd0ac91844f22457640"], "properties": ["color, shape, size", "color, white, wings"], "captions_pred_pc": ["a black and white drawing of a city skyline", "a black and white image of a pair of sunglasses"], "captions_pred_image": ["a 3d model of a black and white cube on a gray surface", "a 3d model of a motorcycle on a white background"], "question": "which object is white", "label": 1}, {"captions": ["3d low poly model of a small house on an island with trees.", "a white chandelier with six lamps, five lights, and glass shades."], "sample_ids": ["3b4db49a69bf48debf30b2693bd79c82", "bf18bfd89efd43389781050230467d58"], "properties": ["3d, model, house", "Lights, number, five"], "captions_pred_pc": ["a black and white illustration of an airplane flying in the sky", "a black and white illustration of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a small cabin in the woods", "a white chandelier with five white shades"], "question": "which entity has more lights", "label": 1}, {"captions": [" of a white plastic tube or metal bar, resembling a knife.", " a blue plastic stool."], "sample_ids": ["8fd3836862a44a8d8b4d224bfc30c2c3", "2ea7d871e2f64c7daa6977a8d268b45e"], "properties": ["a knife, blade, handle", "color, plastic, blue"], "captions_pred_pc": ["a black and white image of a shelf with a white background", "a black and white drawing of a chair with dots"], "captions_pred_image": ["a piece of white plastic on a gray background", "a 3d model of a white chair"], "question": "which object is made of plastic", "label": 1}, {"captions": [" a white plastic container with a lid, a small box, a cup, a bottle, and a jar.", "a 3d object featuring a rock, shell, piece of paper, and cat."], "sample_ids": ["20a02705a66f460492e07345e84a62ed", "53efab50e5a74e5ea165c763cea15be4"], "properties": ["a box, a cup, a bottle, a jar", "a, rock, paper"], "captions_pred_pc": ["a black and white pattern of dots on a white background", "for a flock of birds in the sky"], "captions_pred_image": ["a 3d model of a plastic bottle, a plastic cap, and a plastic container", "a black and white image of a piece of paper in the shape of an island"], "question": "which object has more paper", "label": 1}, {"captions": [" a city with buildings, houses, trees, and grass.", " a small house with a roof."], "sample_ids": ["bc649e19956041cf89c1572f1a33cff1", "9578e8de15ec44ce802072aaa4df3910"], "properties": ["buildings, houses, grass", "roof, house, small"], "captions_pred_pc": ["in 15 words or less a black and white drawing of a snowflake on a white background royalty free illustration", "above a black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and white photograph of a small black and"], "captions_pred_image": ["an image of a pile of trash on the ground", "a 3d model of a small house"], "question": "which entity has a roof", "label": 1}, {"captions": [" a small house with a red roof.", " a small building with windows and a roof."], "sample_ids": ["085db9059b744673b5623b5338e02196", "0ef2cac27e364c0687afae7ab5040cc3"], "properties": ["roof, red, house", "roof, windows, building"], "captions_pred_pc": ["a black and white dotted square on a white background", "a black and white square made up of small dots on a white background"], "captions_pred_image": ["a 3d model of a small shed in the snow", "a 3d model of an apartment building royalty free 3d model preview no 3"], "question": "which building has a roof with windows?", "label": 1}, {"captions": [" a molecule featuring green, red, and blue spheres.", " a brick wall with grass."], "sample_ids": ["1c0e821eb7c4489dbff9e20d7e8575a3", "53f2d948091f417cb580e22469c94db2"], "properties": ["color, sphere, molecule", "brick, grass, wall"], "captions_pred_pc": ["a black and white photograph of a group of geometric shapes arranged in the shape of a diamond", "above a black and white illustration of an underwater scene"], "captions_pred_image": ["a 3d model of a molecule in the shape of a pyramid", "a black and white photo of a brick wall and a puddle"], "question": "which entity is a wall?", "label": 1}, {"captions": [" a white and black striped box resembling a stack of blocks or paper.", " a clear glass table with metal legs and balls on top."], "sample_ids": ["78246d66fd2e4e1195bc4536f4037862", "7c2bfa826f274377ac21f48d510848c3"], "properties": ["striped, white, black", "glass, metal, balls"], "captions_pred_pc": ["a black and white illustration of a pair of shoes on a white background vector illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a pair of shoes on a white background illustration of a", "a black and white image of a wine glass"], "captions_pred_image": ["a 3d model of a stack of books", "a clear acrylic foosball table"], "question": "which object is made of metal", "label": 1}, {"captions": [" a small, modern house with a green roof, located on a hill, surrounded by trees, grass, and a pond.", " a small house with a yellow roof and chimney."], "sample_ids": ["a452d5381dad4dc09f5ebe10635ae5fe", "0056e85a243b47a08ddbcd36816cb6ae"], "properties": ["house, roof, green", "roof, yellow, chimney"], "captions_pred_pc": ["above an illustration of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white image of a black and white", "a black and white illustration of a house made up of dots on a white background a black and white illustration of a house made up of dots on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a building with a black roof", "a 3d model of a small house royalty-free 3d model preview no.2"], "question": "which house has a yellow roof", "label": 1}, {"captions": ["a purple and yellow pixelated ethereum logo in pixel art style.", " a black triangular metal object with a clock and cross on it, and a small machine on top."], "sample_ids": ["bcf111e592d64b6490003680cae9407f", "b198a81dc41c4fde8be3ca51c3b0e676"], "properties": ["color, style, pixel", "metal, cross, clock"], "captions_pred_pc": ["in 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 15 words or less 1", "above a black and white image of an object on a white background"], "captions_pred_image": ["an image of an electronic device with the letter 'z' on it", "a 3d model of a piece of furniture"], "question": "which object has a clock and cross on it", "label": 1}, {"captions": [" a house with a blue roof, chimney, and wooden-beamed ceiling.", " a large metal building with a roof and truss structure."], "sample_ids": ["b380dd4800124a8d96424a504eb0ec6a", "b85a99699ccd4bcba213322113bb253d"], "properties": ["roof, color, blue", "roof, truss, structure"], "captions_pred_pc": ["of a white lace clutch purse on a white background", "of a metal grate on a white background"], "captions_pred_image": ["a 3d model of a building with many windows", "a 3d model of a long metal fence"], "question": "which roof is made of trusses", "label": 1}, {"captions": [" a green cube with a black base and a small robot sitting on top.", "a wooden-cased radio."], "sample_ids": ["c52f7e1f4b194d308b5ddde6deca3955", "c79d1be9b9a0478993bee32c63231a88"], "properties": ["color, shape, size", "case, material, wood"], "captions_pred_pc": ["in one line a black and white image of a dotted square on a white background royalty free illustration", "in 15 words or less a black and white drawing of a computer screen"], "captions_pred_image": ["a 3d model of a white box with a black base", "3d model of a vintage radio 3d model of a vintage radio"], "question": "which object is made of wood", "label": 1}, {"captions": [" a house with a blue roof, chimney, and wooden-beamed ceiling.", " a large building with a roof and windows."], "sample_ids": ["b380dd4800124a8d96424a504eb0ec6a", "32d1fbd3ee91426882290305f70021e6"], "properties": ["roof, color, blue", "roof, windows, building"], "captions_pred_pc": ["of a white lace clutch purse on a white background", "of a black and white photo of a diamond buckle"], "captions_pred_image": ["a 3d model of a building with many windows", "a 3d model of an apartment building royalty free 3d model preview no.2"], "question": "which building has a roof and windows", "label": 1}, {"captions": ["a featuring a building, a square, a cloud with a square in the middle, a group of people in a room and a field, and clouds floating in the sky.", " a stone statue featuring a woman with leaves and flowers, partially broken and surrounded by metal and concrete pieces."], "sample_ids": ["8557a15b9f244d2cbf16786dbc8b7b25", "b896a0898efe4059a776193c02132129"], "properties": ["building, room, sky", "- material is stone, metal, concrete"], "captions_pred_pc": ["above a black and white image of a person's hand holding a paintbrush", "of a white candle in the shape of a buddha"], "captions_pred_image": ["a 3d rendering of white clouds floating in the air", "a 3d model of an ancient statue"], "question": "which entity is made of stone", "label": 1}, {"captions": [" a destroyed building and a damaged yellow-green machine.", " a building structure featuring yellow and white poles, a yellow roof, and a suspended ceiling with yellow poles."], "sample_ids": ["01406e7034fe4b7da32494c6cbf260f2", "bd7aab78974643f5a0660c699daf8eb3"], "properties": ["building, color, yellow-green", "roof, color, yellow"], "captions_pred_pc": ["a black and white photograph of a piece of paper", "a black and white drawing of a room"], "captions_pred_image": ["a 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged letter 'h' 3d model of a damaged", "a 3d model of a table and chairs on a white background"], "question": "which building has a yellow roof", "label": 1}, {"captions": ["a small yellow and green sphere resembling saturn with a hat.", " a clay pot with holes in it."], "sample_ids": ["6811a2f3d1154dccb37d534ae673e673", "8ffcd902aa0a4efea55ca1dbab28f462"], "properties": ["color, shape, size", "hole, material, clay"], "captions_pred_pc": ["a black and white illustration of a dotted circle on a white background a black and white illustration of a dotted circle on a white background royalty free illustration", "a black and white illustration of a microscopic image of a virus on a white background illustration of a black and white illustration of a microscopic image of a virus on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of the planet saturn royalty free 3d model preview no.2", "a clay sculpture of a face with holes in it"], "question": "which object is made of clay", "label": 1}, {"captions": [" of a blue tarp, flower, small island with a boat and phone, and a tent with a blue blanket.", " a small white house with a roof."], "sample_ids": ["94704d86c22c4bdfb86ac24979926066", "5776bd692bd240cb9c14817c821d7c22"], "properties": ["blue tarp, flower, small island", "roof, color, white"], "captions_pred_pc": ["above a 3d rendering of a fish in the air", "a black and white drawing of a house on a white background"], "captions_pred_image": ["a black and white image of a piece of paper on the ground", "a 3d model of a building with a white roof"], "question": "which entity has a roof", "label": 1}, {"captions": [" a cube with a wooden ball on top.", " a city with buildings, houses, trees, and grass."], "sample_ids": ["874f49c0bdd04c9cbcc54fcac1a337ad", "bc649e19956041cf89c1572f1a33cff1"], "properties": ["shape is cube, material is wood, color is white", "buildings, houses, grass"], "captions_pred_pc": ["in one line a black and white image of a square made up of tiny dots", "in 15 words or less a black and white drawing of a snowflake on a white background royalty free illustration"], "captions_pred_image": ["a 3d model of a ball sitting on top of a cube", "an image of a pile of trash on the ground"], "question": "which entity is made of grass", "label": 1}, {"captions": [" of a tree stump and rock with flowers on them.", " a house with a pink roof, brick walls, and insulated ceiling."], "sample_ids": ["3f74af45aeeb43ee95e2c8a5e3afeae6", "c8936ace72954650b4e2d84246964849"], "properties": ["flower, rock, tree stump", "roof, color, pink"], "captions_pred_pc": ["above a black and white drawing of a flower on a white background", "a black and white drawing of a toilet"], "captions_pred_image": ["a 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stump in the snow 3d model of a tree stum